From b6df463884c7db1ab4089fef464cdd87c946a8d5 Mon Sep 17 00:00:00 2001 From: Jacan Chaplais Date: Tue, 3 Oct 2023 16:25:06 +0100 Subject: [PATCH 1/2] parallelised cluster coeff --- graphicle/calculate.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/graphicle/calculate.py b/graphicle/calculate.py index 9191bab..9d62e70 100644 --- a/graphicle/calculate.py +++ b/graphicle/calculate.py @@ -559,12 +559,12 @@ def _delta_R_symmetric( return result -@nb.njit("float32[:](bool_[:, :])") +@nb.njit("float32[:](bool_[:, :])", parallel=True) def _clust_coeffs(adj: base.BoolVector) -> base.FloatVector: num_nodes = adj.shape[0] coefs = np.empty(num_nodes, dtype=np.float32) - for node_idx, row in enumerate(adj): - neibs = np.flatnonzero(row) + for node_idx in nb.prange(num_nodes): + neibs = np.flatnonzero(adj[node_idx, :]) num_neibs = neibs.shape[0] if num_neibs < 2: coefs[node_idx] = 0.0 @@ -575,7 +575,10 @@ def _clust_coeffs(adj: base.BoolVector) -> base.FloatVector: def cluster_coeff_distbn( - pmu: "MomentumArray", radius: float, pseudo: bool = True + pmu: "MomentumArray", + radius: float, + pseudo: bool = True, + threads: int = 1, ) -> base.FloatVector: """A measure of clustering for a particle point-cloud. Transforms point-cloud into a graph, where node neighbourhood is determined by @@ -596,15 +599,19 @@ def cluster_coeff_distbn( pseudo : bool If True, will use pseudorapidity, rather than true rapidity. Default is True. + threads : int + Number of threads to use in the parallel portions of the + calculation. Returns ------- ndarray[float32] Clustering coefficients of the particles in the point-cloud. """ - adj = pmu.delta_R(pmu, pseudo) < radius + adj = pmu.delta_R(pmu, pseudo, threads) < radius np.fill_diagonal(adj, False) - return _clust_coeffs(adj) + with _thread_scope(threads): + return _clust_coeffs(adj) @ctx.contextmanager From 6fb4da0bf72a736bc5f9f0c0da06c07347823ff8 Mon Sep 17 00:00:00 2001 From: Jacan Chaplais Date: Tue, 3 Oct 2023 17:32:01 +0100 Subject: [PATCH 2/2] minor internal naming improvement --- graphicle/select.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/graphicle/select.py b/graphicle/select.py index 1d21292..131f007 100644 --- a/graphicle/select.py +++ b/graphicle/select.py @@ -11,11 +11,11 @@ import typing as ty import awkward as ak -import fastjet +import fastjet as fj import numpy as np import pandas as pd import scipy.optimize as opt -from scipy.sparse.csgraph import breadth_first_tree +from scipy.sparse.csgraph import breadth_first_tree as _breadth_first_tree import graphicle as gcl @@ -113,21 +113,22 @@ def fastjet_clusters( pmu_ak = ak.from_numpy(pmu_renamed) extra_param = tuple() if p_val == -1: - algo = fastjet.antikt_algorithm + algo = fj.antikt_algorithm elif p_val == 0: - algo = fastjet.cambridge_algorithm + algo = fj.cambridge_algorithm elif p_val == 1: - algo = fastjet.kt_algorithm + algo = fj.kt_algorithm else: - algo = fastjet.genkt_algorithm + algo = fj.genkt_algorithm extra_param = (p_val,) - jetdef = fastjet.JetDefinition(algo, radius, *extra_param) - sequence = fastjet.ClusterSequence(pmu_ak, jetdef) + jetdef = fj.JetDefinition(algo, radius, *extra_param) + sequence = fj.ClusterSequence(pmu_ak, jetdef) jets = sequence.inclusive_jets() jet_pmus = gcl.MomentumArray(jets.to_numpy()) + num_jets = len(jet_pmus) pt_descend_idxs = np.argsort(jet_pmus.pt)[::-1].tolist() jet_pmus = jet_pmus[pt_descend_idxs] - cuts = np.ones(len(pt_descend_idxs), dtype=np.bool_) + cuts = np.ones(num_jets, dtype=np.bool_) if pt_cut is not None: cuts = jet_pmus.pt > pt_cut if eta_cut is not None: @@ -263,7 +264,7 @@ def vertex_descendants(adj: gcl.AdjacencyList, vertex: int) -> gcl.MaskArray: return gcl.MaskArray(adj.edges["dst"] == vertex) sparse = adj._sparse_signed vertex = sparse.row[vertex_mask][0] - bft = breadth_first_tree(adj._sparse_csr, vertex) + bft = _breadth_first_tree(adj._sparse_csr, vertex) mask = np.isin(sparse.row, bft.indices) mask[sparse.row == vertex] = True # include edges directly from parent return gcl.MaskArray(mask)