Skip to content

Commit

Permalink
Merge pull request #46136 from jsamudio/dev_pfrhfAlloc_14_1_X
Browse files Browse the repository at this point in the history
[14_1_X] Add runtime allocation of PF rechit fraction SoA
  • Loading branch information
cmsbuild authored Oct 5, 2024
2 parents ffd3756 + 8ccf6b5 commit 4bc3e62
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 31 deletions.
11 changes: 11 additions & 0 deletions HLTrigger/Configuration/python/customizeHLTforCMSSW.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,16 @@ def checkHLTfor43774(process):

return process

def customizeHLTfor46135(process):
"""Remove pfRecHitFractionAllocation from PFClusterSoAProducer config"""
for producer in producers_by_type(process, "PFClusterSoAProducer@alpaka"):
if hasattr(producer, 'pfRecHitFractionAllocation'):
delattr(producer, 'pfRecHitFractionAllocation')
for producer in producers_by_type(process, "alpaka_serial_sync::PFClusterSoAProducer"):
if hasattr(producer, 'pfRecHitFractionAllocation'):
delattr(producer, 'pfRecHitFractionAllocation')
return process

# CMSSW version specific customizations
def customizeHLTforCMSSW(process, menuType="GRun"):

Expand All @@ -270,5 +280,6 @@ def customizeHLTforCMSSW(process, menuType="GRun"):
# process = customiseFor12718(process)

process = checkHLTfor43774(process)
process = customizeHLTfor46135(process)

return process
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,83 @@
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "FWCore/Utilities/interface/InputTag.h"
#include "FWCore/Utilities/interface/StreamID.h"
#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/EDProducer.h"
#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h"
#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h"
#include "RecoParticleFlow/PFClusterProducer/interface/PFCPositionCalculatorBase.h"
#include "RecoParticleFlow/PFClusterProducer/interface/alpaka/PFClusterParamsDeviceCollection.h"
#include "RecoParticleFlow/PFClusterProducer/plugins/alpaka/PFClusterSoAProducerKernel.h"
#include "RecoParticleFlow/PFRecHitProducer/interface/PFRecHitTopologyRecord.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE {
class PFClusterSoAProducer : public stream::EDProducer<> {
class PFClusterSoAProducer : public stream::SynchronizingEDProducer<> {
public:
PFClusterSoAProducer(edm::ParameterSet const& config)
: pfClusParamsToken(esConsumes(config.getParameter<edm::ESInputTag>("pfClusterParams"))),
topologyToken_(esConsumes(config.getParameter<edm::ESInputTag>("topology"))),
inputPFRecHitSoA_Token_{consumes(config.getParameter<edm::InputTag>("pfRecHits"))},
outputPFClusterSoA_Token_{produces()},
outputPFRHFractionSoA_Token_{produces()},
synchronise_(config.getParameter<bool>("synchronise")),
pfRecHitFractionAllocation_(config.getParameter<int>("pfRecHitFractionAllocation")) {}
numRHF_{cms::alpakatools::make_host_buffer<uint32_t, Platform>()},
synchronise_(config.getParameter<bool>("synchronise")) {}

void produce(device::Event& event, device::EventSetup const& setup) override {
void acquire(device::Event const& event, device::EventSetup const& setup) override {
const reco::PFClusterParamsDeviceCollection& params = setup.getData(pfClusParamsToken);
const reco::PFRecHitHCALTopologyDeviceCollection& topology = setup.getData(topologyToken_);
const reco::PFRecHitHostCollection& pfRecHits = event.get(inputPFRecHitSoA_Token_);
int nRH = 0;
if (pfRecHits->metadata().size() != 0)
nRH = pfRecHits->size();

reco::PFClusteringVarsDeviceCollection pfClusteringVars{nRH, event.queue()};
reco::PFClusteringEdgeVarsDeviceCollection pfClusteringEdgeVars{(nRH * 8), event.queue()};
reco::PFClusterDeviceCollection pfClusters{nRH, event.queue()};
reco::PFRecHitFractionDeviceCollection pfrhFractions{nRH * pfRecHitFractionAllocation_, event.queue()};
pfClusteringVars_.emplace(nRH, event.queue());
pfClusteringEdgeVars_.emplace(nRH * 8, event.queue());
pfClusters_.emplace(nRH, event.queue());

*numRHF_ = 0;

if (nRH != 0) {
PFClusterProducerKernel kernel(event.queue(), pfRecHits);
kernel.seedTopoAndContract(event.queue(),
params,
topology,
*pfClusteringVars_,
*pfClusteringEdgeVars_,
pfRecHits,
*pfClusters_,
numRHF_.data());
}
}

void produce(device::Event& event, device::EventSetup const& setup) override {
const reco::PFClusterParamsDeviceCollection& params = setup.getData(pfClusParamsToken);
const reco::PFRecHitHCALTopologyDeviceCollection& topology = setup.getData(topologyToken_);
const reco::PFRecHitHostCollection& pfRecHits = event.get(inputPFRecHitSoA_Token_);
int nRH = 0;

std::optional<reco::PFRecHitFractionDeviceCollection> pfrhFractions;

if (pfRecHits->metadata().size() != 0)
nRH = pfRecHits->size();

if (nRH != 0) {
pfrhFractions.emplace(*numRHF_.data(), event.queue());
PFClusterProducerKernel kernel(event.queue(), pfRecHits);
kernel.execute(event.queue(),
kernel.cluster(event.queue(),
params,
topology,
pfClusteringVars,
pfClusteringEdgeVars,
*pfClusteringVars_,
*pfClusteringEdgeVars_,
pfRecHits,
pfClusters,
pfrhFractions);
*pfClusters_,
*pfrhFractions);
} else {
pfrhFractions.emplace(0, event.queue());
}

if (synchronise_)
alpaka::wait(event.queue());

event.emplace(outputPFClusterSoA_Token_, std::move(pfClusters));
event.emplace(outputPFRHFractionSoA_Token_, std::move(pfrhFractions));
event.emplace(outputPFClusterSoA_Token_, std::move(*pfClusters_));
event.emplace(outputPFRHFractionSoA_Token_, std::move(*pfrhFractions));
}

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
Expand All @@ -65,7 +93,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
desc.add<edm::ESInputTag>("pfClusterParams", edm::ESInputTag(""));
desc.add<edm::ESInputTag>("topology", edm::ESInputTag(""));
desc.add<bool>("synchronise", false);
desc.add<int>("pfRecHitFractionAllocation", 120);
descriptions.addWithDefaultLabel(desc);
}

Expand All @@ -75,8 +102,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
const edm::EDGetTokenT<reco::PFRecHitHostCollection> inputPFRecHitSoA_Token_;
const device::EDPutToken<reco::PFClusterDeviceCollection> outputPFClusterSoA_Token_;
const device::EDPutToken<reco::PFRecHitFractionDeviceCollection> outputPFRHFractionSoA_Token_;
cms::alpakatools::host_buffer<uint32_t> numRHF_;
std::optional<reco::PFClusteringVarsDeviceCollection> pfClusteringVars_;
std::optional<reco::PFClusteringEdgeVarsDeviceCollection> pfClusteringEdgeVars_;
std::optional<reco::PFClusterDeviceCollection> pfClusters_;
const bool synchronise_;
const int pfRecHitFractionAllocation_;
};

} // namespace ALPAKA_ACCELERATOR_NAMESPACE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1090,7 +1090,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
const reco::PFRecHitHCALTopologyDeviceCollection::ConstView topology,
const reco::PFRecHitHostCollection::ConstView pfRecHits,
reco::PFClusterDeviceCollection::View clusterView,
reco::PFRecHitFractionDeviceCollection::View fracView,
uint32_t* __restrict__ nSeeds) const {
const int nRH = pfRecHits.size();

Expand Down Expand Up @@ -1199,7 +1198,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
const reco::PFRecHitHostCollection::ConstView pfRecHits,
reco::PFClusteringVarsDeviceCollection::View pfClusteringVars,
reco::PFClusterDeviceCollection::View clusterView,
uint32_t* __restrict__ nSeeds) const {
uint32_t* __restrict__ nSeeds,
uint32_t* __restrict__ nRHF) const {
const int nRH = pfRecHits.size();
int& totalSeedOffset = alpaka::declareSharedVar<int, __COUNTER__>(acc);
int& totalSeedFracOffset = alpaka::declareSharedVar<int, __COUNTER__>(acc);
Expand Down Expand Up @@ -1302,6 +1302,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
pfClusteringVars.pcrhFracSize() = totalSeedFracOffset;
pfClusteringVars.nRHFracs() = totalSeedFracOffset;
clusterView.nRHFracs() = totalSeedFracOffset;
*nRHF = totalSeedFracOffset;
clusterView.nSeeds() = *nSeeds;
clusterView.nTopos() = pfClusteringVars.nTopos();

Expand Down Expand Up @@ -1467,14 +1468,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
alpaka::memset(queue, nSeeds, 0x00); // Reset nSeeds
}

void PFClusterProducerKernel::execute(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
const reco::PFRecHitHostCollection& pfRecHits,
reco::PFClusterDeviceCollection& pfClusters,
reco::PFRecHitFractionDeviceCollection& pfrhFractions) {
void PFClusterProducerKernel::seedTopoAndContract(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
const reco::PFRecHitHostCollection& pfRecHits,
reco::PFClusterDeviceCollection& pfClusters,
uint32_t* __restrict__ nRHF) {
const int nRH = pfRecHits->size();
const int threadsPerBlock = 256;
const int blocks = divide_up_by(nRH, threadsPerBlock);
Expand All @@ -1488,7 +1489,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
topology.view(),
pfRecHits.view(),
pfClusters.view(),
pfrhFractions.view(),
nSeeds.data());
// prepareTopoInputs
alpaka::exec<Acc1D>(queue,
Expand Down Expand Up @@ -1524,7 +1524,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
pfRecHits.view(),
pfClusteringVars.view(),
pfClusters.view(),
nSeeds.data());
nSeeds.data(),
nRHF);
}

void PFClusterProducerKernel::cluster(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
const reco::PFRecHitHostCollection& pfRecHits,
reco::PFClusterDeviceCollection& pfClusters,
reco::PFRecHitFractionDeviceCollection& pfrhFractions) {
const int nRH = pfRecHits->size();

// fillRhfIndex
alpaka::exec<Acc2D>(queue,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
public:
PFClusterProducerKernel(Queue& queue, const reco::PFRecHitHostCollection& pfRecHits);

void execute(Queue& queue,
void seedTopoAndContract(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
reco::PFClusteringEdgeVarsDeviceCollection& pfClusteringEdgeVars,
const reco::PFRecHitHostCollection& pfRecHits,
reco::PFClusterDeviceCollection& pfClusters,
uint32_t* __restrict__ nRHF);

void cluster(Queue& queue,
const reco::PFClusterParamsDeviceCollection& params,
const reco::PFRecHitHCALTopologyDeviceCollection& topology,
reco::PFClusteringVarsDeviceCollection& pfClusteringVars,
Expand Down

0 comments on commit 4bc3e62

Please sign in to comment.