diff --git a/docs/source/conf.py b/docs/source/conf.py index 2e76bff..1460ef2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -54,7 +54,6 @@ "sphinx.ext.autosummary", "sphinx.ext.napoleon", "sphinx.ext.intersphinx", - "sphinx_mdinclude", "sphinx_immaterial", "sphinx_immaterial.apidoc.python.apigen", "sphinx.ext.linkcode", diff --git a/graphicle/calculate.py b/graphicle/calculate.py index 9d62e70..4ba1073 100644 --- a/graphicle/calculate.py +++ b/graphicle/calculate.py @@ -415,7 +415,7 @@ def flow_trace( return traces -@nb.njit("float64[:](float64[:], float64[:], float64)") +@nb.njit("float64[:](float64[:], float64[:], float64)", cache=True) def _rapidity( energy: base.DoubleVector, z: base.DoubleVector, zero_tol: float ) -> base.DoubleVector: @@ -447,7 +447,7 @@ def _rapidity( return rap -@nb.vectorize("float64(float64, float64)") +@nb.vectorize("float64(float64, float64)", cache=True) def _root_diff_two_squares( x1: base.DoubleUfunc, x2: base.DoubleUfunc ) -> base.DoubleUfunc: @@ -484,6 +484,7 @@ def _root_diff_two_squares( @nb.njit( "float64[:, :](float64[:], float64[:], complex128[:], complex128[:])", parallel=True, + cache=True, ) def _delta_R( rapidity_1: base.DoubleVector, @@ -522,7 +523,7 @@ def _delta_R( return result -@nb.njit("float64[:, :](float64[:], complex128[:])", parallel=True) +@nb.njit("float64[:, :](float64[:], complex128[:])", parallel=True, cache=True) def _delta_R_symmetric( rapidity: base.DoubleVector, xy_pol: base.ComplexVector ) -> base.DoubleVector: @@ -559,7 +560,7 @@ def _delta_R_symmetric( return result -@nb.njit("float32[:](bool_[:, :])", parallel=True) +@nb.njit("float32[:](bool_[:, :])", parallel=True, cache=True) def _clust_coeffs(adj: base.BoolVector) -> base.FloatVector: num_nodes = adj.shape[0] coefs = np.empty(num_nodes, dtype=np.float32) @@ -652,3 +653,32 @@ def aggregate_momenta( pmus = map(fn.partial(op.getitem, pmu), cluster_masks) pmu_sums = map(fn.partial(np.sum, axis=0), pmus) return momentum_class(list(it.chain.from_iterable(pmu_sums))) + + +@nb.njit( + "float64[:, :](float64[:], float64[:], complex128[:], complex128[:])", + parallel=True, + cache=True, +) +def _assignment_cost( + rapidity_1: base.DoubleVector, + rapidity_2: base.DoubleVector, + xy_pol_1: base.ComplexVector, + xy_pol_2: base.ComplexVector, +) -> base.DoubleVector: + dist_matrix = _delta_R(rapidity_1, rapidity_2, xy_pol_1, xy_pol_2) + pt_2 = rapidity_2 # recycle memory buffer for transverse momenta + for pol_idx, pol_val in enumerate(xy_pol_2): + pt_2[pol_idx] = abs(pol_val) + var_pt_recip = 1.0 / np.var(pt_2) + num_partons = dist_matrix.shape[0] + for parton_idx in nb.prange(num_partons): + row = dist_matrix[parton_idx, :] + pt_1_val = abs(xy_pol_1[parton_idx]) + var_dR_recip = 1.0 / np.var(row) + for jet_idx, (dR_val, pt_2_val) in enumerate(zip(row, pt_2)): + dpt = pt_1_val - pt_2_val + dR_cost = math.expm1(-0.5 * var_dR_recip * dR_val * dR_val) + pt_cost = math.expm1(-0.5 * var_pt_recip * dpt * dpt) + row[jet_idx] = -(dR_cost + pt_cost) + return dist_matrix diff --git a/graphicle/select.py b/graphicle/select.py index 3661c64..4aeb71c 100644 --- a/graphicle/select.py +++ b/graphicle/select.py @@ -102,13 +102,36 @@ def fastjet_clusters( List containing masks over the input data for each jet clustering, in order of descending :math:`p_T`. + Raises + ------ + ValueError + When a negative value is passed to ``eta_cut``, ``pt_cut``, or + ``radius``. Additionally, when ``top_k`` is passed as either a + non-integer, or with a value less than one. + Notes ----- ``p_val`` set to ``-1`` gives **anti-kT**, ``0`` gives **Cambridge-Aachen**, and ``1`` gives **kT** clusterings. + + To prevent expensive repeated memory allocations, the underlying + masks are stored as a single contiguous array, where each row is the + data for the respective ``MaskArray`` in the output list. This may + cause undefined behaviour if you apply views on the underlying data + in a ``MaskArray`` without copying it. """ + if pt_cut is None: + pt_cut = 0.0 + elif pt_cut < 0.0: + raise ValueError("pt_cut must be non-negative.") + if radius < 0.0: + raise ValueError("radius must be non-negative.") + if (top_k is not None) and ((top_k < 1) or (not isinstance(top_k, int))): + raise ValueError( + "top_k must be an integer with a value greater than zero." + ) num_pcls = len(pmu) - pmu_renamed = pmu.data.copy()[["e", "x", "y", "z"]] + pmu_renamed = pmu.data[["e", "x", "y", "z"]].copy() pmu_renamed.dtype.names = "E", "px", "py", "pz" pmu_ak = ak.from_numpy(pmu_renamed) extra_param = tuple() @@ -121,30 +144,24 @@ def fastjet_clusters( else: algo = fj.genkt_algorithm extra_param = (p_val,) - jetdef = fj.JetDefinition(algo, radius, *extra_param) - sequence = fj.ClusterSequence(pmu_ak, jetdef) - jets = sequence.inclusive_jets() + jet_def = fj.JetDefinition(algo, radius, *extra_param) + sequence = fj.ClusterSequence(pmu_ak, jet_def) + jets = sequence.inclusive_jets(pt_cut) jet_pmus = gcl.MomentumArray(jets.to_numpy()) - num_jets = len(jet_pmus) pt_descend_idxs = np.argsort(jet_pmus.pt)[::-1].tolist() - jet_pmus = jet_pmus[pt_descend_idxs] - cuts = np.ones(num_jets, dtype=np.bool_) - if pt_cut is not None: - cuts = jet_pmus.pt > pt_cut + jet_idxs_ = sequence.constituent_index(pt_cut)[pt_descend_idxs] if eta_cut is not None: - cuts = np.logical_and(cuts, np.abs(jet_pmus.eta) < eta_cut) - jet_idxs = sequence.constituent_index().to_list() - jet_idxs = op.itemgetter(*pt_descend_idxs)(jet_idxs) - jet_idxs = it.compress(jet_idxs, cuts) - if top_k is not None: - jet_idxs = it.islice(jet_idxs, top_k) - mask_empty = gcl.MaskArray(np.zeros(num_pcls, dtype=" ty.List[int]: """Assigns four-momenta elements in ``candidate`` to the nearest four-momenta elements in ``focus``. Elements in ``candidate`` are @@ -1155,6 +1174,9 @@ def arg_closest( .. versionadded:: 0.2.14 + .. versionchanged:: 0.3.8 + Modified the distance metric to include transverse momentum. + Parameters ---------- focus : MomentumArray @@ -1163,6 +1185,9 @@ def arg_closest( candidate : MomentumArray Four-momenta of candidate objects to draw from until ``focus`` objects have each received an assignment. + num_threads : int + Number of threads to parallelise the cost matrix computation + over. Default is 1. Returns ------- @@ -1196,7 +1221,15 @@ def arg_closest( Systems*, 52(4):1679-1696, August 2016, :doi:`10.1109/TAES.2016.140952` """ - _, idxs = opt.linear_sum_assignment(focus.delta_R(candidate, pseudo=False)) + with gcl.calculate._thread_scope(num_threads): + cost_matrix = gcl.calculate._assignment_cost( + focus.rapidity, + candidate.rapidity, + focus._xy_pol, + candidate._xy_pol, + ) + del candidate.rapidity # inplace operation above invalidates cache + _, idxs = opt.linear_sum_assignment(cost_matrix) return idxs.tolist() @@ -1343,6 +1376,11 @@ def monte_carlo_tag( if clustered_pmu is None: clustered_pmu = particles.pmu[particles.final] ref_length = "particles.final" + if len(cluster_masks) < len(hard_pmu): + raise ValueError( + f"shape mismatch: only {len(cluster_masks)} clusters " + f"passed to tag {len(hard_pmu)} partons." + ) if len(clustered_pmu) != len(cluster_masks[0]): raise ValueError( "shape mismatch: length of elements in cluster_masks must be the "