Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Detectors/ITSMFT/ITS/tracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,5 @@ o2_target_root_dictionary(ITStracking
if(CUDA_ENABLED OR HIP_ENABLED)
add_subdirectory(GPU)
endif()

add_subdirectory(test)
142 changes: 77 additions & 65 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Large diffs are not rendered by default.

14 changes: 8 additions & 6 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ void TrackerTraitsGPU<nLayers>::initialiseTimeFrame(const int iteration)
mTimeFrameGPU->loadVertices(iteration);
mTimeFrameGPU->loadIndexTableUtils(iteration);
mTimeFrameGPU->loadMultiplicityCutMask(iteration);
// pinned on host
mTimeFrameGPU->createUsedClustersDeviceArray(iteration);
mTimeFrameGPU->createClustersDeviceArray(iteration);
mTimeFrameGPU->createUnsortedClustersDeviceArray(iteration);
mTimeFrameGPU->createClustersIndexTablesArray(iteration);
mTimeFrameGPU->createTrackingFrameInfoDeviceArray(iteration);
mTimeFrameGPU->createROFrameClustersDeviceArray(iteration);
// device array
mTimeFrameGPU->createTrackletsLUTDeviceArray(iteration);
mTimeFrameGPU->createTrackletsBuffersArray(iteration);
mTimeFrameGPU->createCellsBuffersArray(iteration);
Expand Down Expand Up @@ -106,7 +108,7 @@ void TrackerTraitsGPU<nLayers>::computeLayerTracklets(const int iteration, int i
mTimeFrameGPU->getPositionResolutions(),
this->mTrkParams[iteration].LayerRadii,
mTimeFrameGPU->getMSangles(),
mTimeFrameGPU->getExternalAllocator(),
mTimeFrameGPU->getExternalDeviceAllocator(),
conf.nBlocksLayerTracklets[iteration],
conf.nThreadsLayerTracklets[iteration],
mTimeFrameGPU->getStreams());
Expand Down Expand Up @@ -144,7 +146,7 @@ void TrackerTraitsGPU<nLayers>::computeLayerTracklets(const int iteration, int i
mTimeFrameGPU->getPositionResolutions(),
this->mTrkParams[iteration].LayerRadii,
mTimeFrameGPU->getMSangles(),
mTimeFrameGPU->getExternalAllocator(),
mTimeFrameGPU->getExternalDeviceAllocator(),
conf.nBlocksLayerTracklets[iteration],
conf.nThreadsLayerTracklets[iteration],
mTimeFrameGPU->getStreams());
Expand Down Expand Up @@ -195,7 +197,7 @@ void TrackerTraitsGPU<nLayers>::computeLayerCells(const int iteration)
this->mTrkParams[iteration].MaxChi2ClusterAttachment,
this->mTrkParams[iteration].CellDeltaTanLambdaSigma,
this->mTrkParams[iteration].NSigmaCut,
mTimeFrameGPU->getExternalAllocator(),
mTimeFrameGPU->getExternalDeviceAllocator(),
conf.nBlocksLayerCells[iteration],
conf.nThreadsLayerCells[iteration],
mTimeFrameGPU->getStreams());
Expand Down Expand Up @@ -251,7 +253,7 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighbours(const int iteration)
currentLayerCellsNum,
nextLayerCellsNum,
1e2,
mTimeFrameGPU->getExternalAllocator(),
mTimeFrameGPU->getExternalDeviceAllocator(),
conf.nBlocksFindNeighbours[iteration],
conf.nThreadsFindNeighbours[iteration],
mTimeFrameGPU->getStream(iLayer));
Expand Down Expand Up @@ -279,7 +281,7 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighbours(const int iteration)
mTimeFrameGPU->getDeviceNeighbours(iLayer),
mTimeFrameGPU->getArrayNNeighbours()[iLayer],
mTimeFrameGPU->getStream(iLayer),
mTimeFrameGPU->getExternalAllocator());
mTimeFrameGPU->getExternalDeviceAllocator());
}
mTimeFrameGPU->syncStreams(false);
}
Expand Down Expand Up @@ -310,7 +312,7 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
this->mTrkParams[0].MaxChi2NDF,
mTimeFrameGPU->getDevicePropagator(),
this->mTrkParams[0].CorrType,
mTimeFrameGPU->getExternalAllocator(),
mTimeFrameGPU->getExternalDeviceAllocator(),
conf.nBlocksProcessNeighbours[iteration],
conf.nThreadsProcessNeighbours[iteration]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include <new>
#include <vector>

#include "ITStracking/ExternalAllocator.h"

#include "GPUCommonLogger.h"

namespace o2::its
Expand Down Expand Up @@ -56,6 +58,7 @@ class BoundedMemoryResource final : public std::pmr::memory_resource

BoundedMemoryResource(size_t maxBytes = std::numeric_limits<size_t>::max(), std::pmr::memory_resource* upstream = std::pmr::get_default_resource())
: mMaxMemory(maxBytes), mUpstream(upstream) {}
BoundedMemoryResource(ExternalAllocator* alloc) : mAdaptor(std::make_unique<ExternalAllocatorAdaptor>(alloc)), mUpstream(mAdaptor.get()) {}

void* do_allocate(size_t bytes, size_t alignment) final
{
Expand All @@ -69,7 +72,14 @@ class BoundedMemoryResource final : public std::pmr::memory_resource
} while (!mUsedMemory.compare_exchange_weak(current_used, new_used,
std::memory_order_acq_rel,
std::memory_order_relaxed));
return mUpstream->allocate(bytes, alignment);
void* p{nullptr};
try {
p = mUpstream->allocate(bytes, alignment);
} catch (...) {
mUsedMemory.fetch_sub(bytes, std::memory_order_relaxed);
throw;
}
return p;
}

void do_deallocate(void* p, size_t bytes, size_t alignment) final
Expand All @@ -87,11 +97,12 @@ class BoundedMemoryResource final : public std::pmr::memory_resource
size_t getMaxMemory() const noexcept { return mMaxMemory; }
void setMaxMemory(size_t max)
{
if (mUsedMemory > max) {
size_t used = mUsedMemory.load(std::memory_order_acquire);
if (used > max) {
++mCountThrow;
throw MemoryLimitExceeded(0, mUsedMemory, max);
throw MemoryLimitExceeded(0, used, max);
}
mMaxMemory = max;
mMaxMemory.store(max, std::memory_order_release);
}

void print() const
Expand All @@ -106,76 +117,74 @@ class BoundedMemoryResource final : public std::pmr::memory_resource
}

private:
size_t mMaxMemory{std::numeric_limits<size_t>::max()};
std::atomic<size_t> mMaxMemory{std::numeric_limits<size_t>::max()};
std::atomic<size_t> mCountThrow{0};
std::atomic<size_t> mUsedMemory{0};
std::pmr::memory_resource* mUpstream;
std::unique_ptr<ExternalAllocatorAdaptor> mAdaptor{nullptr};
std::pmr::memory_resource* mUpstream{nullptr};
};

template <typename T>
using bounded_vector = std::pmr::vector<T>;

template <typename T>
void deepVectorClear(std::vector<T>& vec)
inline void deepVectorClear(std::vector<T>& vec)
{
std::vector<T>().swap(vec);
}

template <typename T>
inline void deepVectorClear(bounded_vector<T>& vec, BoundedMemoryResource* bmr = nullptr)
inline void deepVectorClear(bounded_vector<T>& vec, std::pmr::memory_resource* mr = nullptr)
{
std::pmr::memory_resource* tmr = (mr != nullptr) ? mr : vec.get_allocator().resource();
vec.~bounded_vector<T>();
if (bmr == nullptr) {
auto alloc = vec.get_allocator().resource();
new (&vec) bounded_vector<T>(alloc);
} else {
new (&vec) bounded_vector<T>(bmr);
}
new (&vec) bounded_vector<T>(std::pmr::polymorphic_allocator<T>{tmr});
}

template <typename T>
void deepVectorClear(std::vector<bounded_vector<T>>& vec, BoundedMemoryResource* bmr = nullptr)
inline void deepVectorClear(std::vector<bounded_vector<T>>& vec, std::pmr::memory_resource* mr = nullptr)
{
for (auto& v : vec) {
deepVectorClear(v, bmr);
deepVectorClear(v, mr);
}
}

template <typename T, size_t S>
void deepVectorClear(std::array<bounded_vector<T>, S>& arr, BoundedMemoryResource* bmr = nullptr)
inline void deepVectorClear(std::array<bounded_vector<T>, S>& arr, std::pmr::memory_resource* mr = nullptr)
{
for (size_t i{0}; i < S; ++i) {
deepVectorClear(arr[i], bmr);
deepVectorClear(arr[i], mr);
}
}

template <typename T>
void clearResizeBoundedVector(bounded_vector<T>& vec, size_t size, BoundedMemoryResource* bmr, T def = T())
inline void clearResizeBoundedVector(bounded_vector<T>& vec, size_t sz, std::pmr::memory_resource* mr = nullptr, T def = T())
{
std::pmr::memory_resource* tmr = (mr != nullptr) ? mr : vec.get_allocator().resource();
vec.~bounded_vector<T>();
new (&vec) bounded_vector<T>(size, def, bmr);
new (&vec) bounded_vector<T>(sz, def, std::pmr::polymorphic_allocator<T>{tmr});
}

template <typename T>
void clearResizeBoundedVector(std::vector<bounded_vector<T>>& vec, size_t size, BoundedMemoryResource* bmr)
inline void clearResizeBoundedVector(std::vector<bounded_vector<T>>& vec, size_t size, std::pmr::memory_resource* mr)
{
vec.clear();
vec.reserve(size);
for (size_t i{0}; i < size; ++i) {
vec.emplace_back(bmr);
for (size_t i = 0; i < size; ++i) {
vec.emplace_back(std::pmr::polymorphic_allocator<bounded_vector<T>>{mr});
}
}

template <typename T, size_t S>
void clearResizeBoundedArray(std::array<bounded_vector<T>, S>& arr, size_t size, BoundedMemoryResource* bmr, T def = T())
inline void clearResizeBoundedArray(std::array<bounded_vector<T>, S>& arr, size_t size, std::pmr::memory_resource* mr = nullptr, T def = T())
{
for (size_t i{0}; i < S; ++i) {
clearResizeBoundedVector(arr[i], size, bmr, def);
clearResizeBoundedVector(arr[i], size, mr, def);
}
}

template <typename T>
std::vector<T> toSTDVector(const bounded_vector<T>& b)
inline std::vector<T> toSTDVector(const bounded_vector<T>& b)
{
std::vector<T> t(b.size());
std::copy(b.cbegin(), b.cend(), t.begin());
Expand Down
9 changes: 5 additions & 4 deletions Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
#ifndef TRACKINGITSU_INCLUDE_CACLUSTER_H_
#define TRACKINGITSU_INCLUDE_CACLUSTER_H_

#include <array>
#include "ITStracking/Constants.h"
#include "GPUCommonRtypes.h"
#include "GPUCommonArray.h"

namespace o2::its
{
Expand Down Expand Up @@ -47,8 +48,8 @@ struct Cluster final {
float zCoordinate{-999.f};
float phi{-999.f};
float radius{-999.f};
int clusterId{-1};
int indexTableBinIndex{-1};
int clusterId{constants::UnusedIndex};
int indexTableBinIndex{constants::UnusedIndex};

ClassDefNV(Cluster, 1);
};
Expand All @@ -70,7 +71,7 @@ struct TrackingFrameInfo final {
float zCoordinate{-999.f};
float xTrackingFrame{-999.f};
float alphaTrackingFrame{-999.f};
std::array<float, 2> positionTrackingFrame = {-1., -1.};
std::array<float, 2> positionTrackingFrame = {constants::UnusedIndex, constants::UnusedIndex};
std::array<float, 3> covarianceTrackingFrame = {999., 999., 999.};

ClassDefNV(TrackingFrameInfo, 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#ifndef TRACKINGITSU_INCLUDE_EXTERNALALLOCATOR_H_
#define TRACKINGITSU_INCLUDE_EXTERNALALLOCATOR_H_

#include <memory_resource>

namespace o2::its
{

Expand All @@ -25,6 +27,36 @@ class ExternalAllocator
virtual void* allocate(size_t) = 0;
virtual void deallocate(char*, size_t) = 0;
};

class ExternalAllocatorAdaptor final : public std::pmr::memory_resource
{
public:
explicit ExternalAllocatorAdaptor(ExternalAllocator* alloc) : mAlloc(alloc) {}

protected:
void* do_allocate(size_t bytes, size_t alignment) override
{
void* p = mAlloc->allocate(bytes);
if (!p) {
throw std::bad_alloc();
}
return p;
}

void do_deallocate(void* p, size_t bytes, size_t) override
{
mAlloc->deallocate(static_cast<char*>(p), bytes);
}

bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override
{
return this == &other;
}

private:
ExternalAllocator* mAlloc;
};

} // namespace o2::its

#endif
46 changes: 21 additions & 25 deletions Detectors/ITSMFT/ITS/tracking/include/ITStracking/TimeFrame.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ struct TimeFrame {
using CellSeedN = CellSeed<nLayers>;
friend class gpu::TimeFrameGPU<nLayers>;

TimeFrame();
virtual ~TimeFrame();
TimeFrame() = default;
virtual ~TimeFrame() = default;

const Vertex& getPrimaryVertex(const int ivtx) const { return mPrimaryVertices[ivtx]; }
gsl::span<const Vertex> getPrimaryVertices(int rofId) const;
Expand All @@ -95,7 +95,7 @@ struct TimeFrame {
gsl::span<const unsigned char>::iterator& pattIt,
const itsmft::TopologyDictionary* dict,
const dataformats::MCTruthContainer<MCCompLabel>* mcLabels = nullptr);
void resetROFrameData();
void resetROFrameData(size_t nROFs);

int getTotalClusters() const;
auto& getTotVertIteration() { return mTotVertPerIteration; }
Expand Down Expand Up @@ -188,7 +188,7 @@ struct TimeFrame {
auto getNumberOfUsedExtendedClusters() const { return mNExtendedUsedClusters; }

/// memory management
void setMemoryPool(std::shared_ptr<BoundedMemoryResource>& pool);
void setMemoryPool(std::shared_ptr<BoundedMemoryResource> pool);
auto& getMemoryPool() const noexcept { return mMemoryPool; }
bool checkMemory(unsigned long max) { return getArtefactsMemory() < max; }
unsigned long getArtefactsMemory() const;
Expand Down Expand Up @@ -233,33 +233,33 @@ struct TimeFrame {
void setBz(float bz) { mBz = bz; }
float getBz() const { return mBz; }

void setExternalAllocator(ExternalAllocator* allocator)
/// State if memory will be externally managed.
// device
ExternalAllocator* mExtDeviceAllocator{nullptr};
void setExternalDeviceAllocator(ExternalAllocator* allocator) { mExtDeviceAllocator = allocator; }
ExternalAllocator* getExternalDeviceAllocator() { return mExtDeviceAllocator; }
bool hasExternalDeviceAllocator() const noexcept { return mExtDeviceAllocator != nullptr; }
// host
ExternalAllocator* mExtHostAllocator{nullptr};
void setExternalHostAllocator(ExternalAllocator* allocator)
{
if (isGPU()) {
LOGP(debug, "Setting timeFrame allocator to external");
mAllocator = allocator;
} else {
LOGP(fatal, "External allocator is currently only supported for GPU");
}
mExtHostAllocator = allocator;
mExtMemoryPool = std::make_shared<BoundedMemoryResource>(mExtHostAllocator);
}

ExternalAllocator* getExternalAllocator() { return mAllocator; }

virtual void setDevicePropagator(const o2::base::PropagatorImpl<float>*)
{
return;
};
ExternalAllocator* getExternalHostAllocator() { return mExtHostAllocator; }
bool hasExternalHostAllocator() const noexcept { return mExtHostAllocator != nullptr; }
std::shared_ptr<BoundedMemoryResource> mExtMemoryPool;
std::pmr::memory_resource* getMaybeExternalHostResource(bool forceHost = false) { return (hasExternalHostAllocator() && !forceHost) ? mExtMemoryPool.get() : mMemoryPool.get(); }
// Propagator
const o2::base::PropagatorImpl<float>* getDevicePropagator() const { return mPropagatorDevice; }
virtual void setDevicePropagator(const o2::base::PropagatorImpl<float>*) {};

template <typename... T>
void addClusterToLayer(int layer, T&&... args);
template <typename... T>
void addTrackingFrameInfoToLayer(int layer, T&&... args);
void addClusterExternalIndexToLayer(int layer, const int idx) { mClusterExternalIndices[layer].push_back(idx); }

void resetVectors();
void resetTracklets();

/// Debug and printing
void checkTrackletLUTs();
void printROFoffsets();
Expand Down Expand Up @@ -290,10 +290,6 @@ struct TimeFrame {
bounded_vector<int> mROFramesPV;
bounded_vector<Vertex> mPrimaryVertices;

// State if memory will be externally managed.
ExternalAllocator* mAllocator = nullptr;
bool getExtAllocator() const noexcept { return mAllocator != nullptr; }

std::array<bounded_vector<Cluster>, nLayers> mUnsortedClusters;
std::vector<bounded_vector<Tracklet>> mTracklets;
std::vector<bounded_vector<CellSeedN>> mCells;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class Tracker
const LogFunc& = [](const std::string& s) { std::cerr << s << '\n'; });

void setParameters(const std::vector<TrackingParameters>& p) { mTrkParams = p; }
void setMemoryPool(std::shared_ptr<BoundedMemoryResource>& pool) { mMemoryPool = pool; }
void setMemoryPool(std::shared_ptr<BoundedMemoryResource> pool) { mMemoryPool = pool; }
std::vector<TrackingParameters>& getParameters() { return mTrkParams; }
void setBz(float bz) { mTraits->setBz(bz); }
bool isMatLUT() const { return mTraits->isMatLUT(); }
Expand Down
Loading