Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallel computation for clustering coefficients #162

Merged
merged 2 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions graphicle/calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,12 +559,12 @@ def _delta_R_symmetric(
return result


@nb.njit("float32[:](bool_[:, :])")
@nb.njit("float32[:](bool_[:, :])", parallel=True)
def _clust_coeffs(adj: base.BoolVector) -> base.FloatVector:
num_nodes = adj.shape[0]
coefs = np.empty(num_nodes, dtype=np.float32)
for node_idx, row in enumerate(adj):
neibs = np.flatnonzero(row)
for node_idx in nb.prange(num_nodes):
neibs = np.flatnonzero(adj[node_idx, :])
num_neibs = neibs.shape[0]
if num_neibs < 2:
coefs[node_idx] = 0.0
Expand All @@ -575,7 +575,10 @@ def _clust_coeffs(adj: base.BoolVector) -> base.FloatVector:


def cluster_coeff_distbn(
pmu: "MomentumArray", radius: float, pseudo: bool = True
pmu: "MomentumArray",
radius: float,
pseudo: bool = True,
threads: int = 1,
) -> base.FloatVector:
"""A measure of clustering for a particle point-cloud. Transforms
point-cloud into a graph, where node neighbourhood is determined by
Expand All @@ -596,15 +599,19 @@ def cluster_coeff_distbn(
pseudo : bool
If True, will use pseudorapidity, rather than true rapidity.
Default is True.
threads : int
Number of threads to use in the parallel portions of the
calculation.

Returns
-------
ndarray[float32]
Clustering coefficients of the particles in the point-cloud.
"""
adj = pmu.delta_R(pmu, pseudo) < radius
adj = pmu.delta_R(pmu, pseudo, threads) < radius
np.fill_diagonal(adj, False)
return _clust_coeffs(adj)
with _thread_scope(threads):
return _clust_coeffs(adj)


@ctx.contextmanager
Expand Down
21 changes: 11 additions & 10 deletions graphicle/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
import typing as ty

import awkward as ak
import fastjet
import fastjet as fj
import numpy as np
import pandas as pd
import scipy.optimize as opt
from scipy.sparse.csgraph import breadth_first_tree
from scipy.sparse.csgraph import breadth_first_tree as _breadth_first_tree

import graphicle as gcl

Expand Down Expand Up @@ -113,21 +113,22 @@ def fastjet_clusters(
pmu_ak = ak.from_numpy(pmu_renamed)
extra_param = tuple()
if p_val == -1:
algo = fastjet.antikt_algorithm
algo = fj.antikt_algorithm
elif p_val == 0:
algo = fastjet.cambridge_algorithm
algo = fj.cambridge_algorithm
elif p_val == 1:
algo = fastjet.kt_algorithm
algo = fj.kt_algorithm
else:
algo = fastjet.genkt_algorithm
algo = fj.genkt_algorithm
extra_param = (p_val,)
jetdef = fastjet.JetDefinition(algo, radius, *extra_param)
sequence = fastjet.ClusterSequence(pmu_ak, jetdef)
jetdef = fj.JetDefinition(algo, radius, *extra_param)
sequence = fj.ClusterSequence(pmu_ak, jetdef)
jets = sequence.inclusive_jets()
jet_pmus = gcl.MomentumArray(jets.to_numpy())
num_jets = len(jet_pmus)
pt_descend_idxs = np.argsort(jet_pmus.pt)[::-1].tolist()
jet_pmus = jet_pmus[pt_descend_idxs]
cuts = np.ones(len(pt_descend_idxs), dtype=np.bool_)
cuts = np.ones(num_jets, dtype=np.bool_)
if pt_cut is not None:
cuts = jet_pmus.pt > pt_cut
if eta_cut is not None:
Expand Down Expand Up @@ -263,7 +264,7 @@ def vertex_descendants(adj: gcl.AdjacencyList, vertex: int) -> gcl.MaskArray:
return gcl.MaskArray(adj.edges["dst"] == vertex)
sparse = adj._sparse_signed
vertex = sparse.row[vertex_mask][0]
bft = breadth_first_tree(adj._sparse_csr, vertex)
bft = _breadth_first_tree(adj._sparse_csr, vertex)
mask = np.isin(sparse.row, bft.indices)
mask[sparse.row == vertex] = True # include edges directly from parent
return gcl.MaskArray(mask)
Expand Down
Loading