Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions onnxruntime/core/providers/cuda/cuda_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,23 @@ AllocatorPtr CUDAExecutionProvider::CreateCudaAllocator(const CUDAAllocatorParam
}
}

AllocatorPtr CUDAExecutionProvider::CreateCudaPinnedAllocator(const CUDAAllocatorParams& cuda_allocator_params) {
const auto* arena_cfg = cuda_allocator_params.arena_cfg;
AllocatorCreationInfo pinned_memory_info(
[](OrtDevice::DeviceId id) {
return std::make_unique<CUDAPinnedAllocator>(id, CUDA_PINNED);
},
cuda_allocator_params.device_id,
true,
{arena_cfg ? *arena_cfg
: OrtArenaCfg(cuda_allocator_params.cuda_mem_threshold,
static_cast<int>(cuda_allocator_params.arena_extend_strategy), -1, -1, -1, -1L)},
// stream-aware flag (intentionally set to false for this allocator)
false);

return CreateAllocator(pinned_memory_info);
}

CUDAExecutionProvider::PerThreadContext::PerThreadContext(OrtDevice::DeviceId device_id, cudaStream_t stream, size_t /*gpu_mem_limit*/,
ArenaExtendStrategy /*arena_extend_strategy*/, CUDAExecutionProviderExternalAllocatorInfo /*external_allocator_info*/,
OrtArenaCfg* /*default_memory_arena_cfg*/) {
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/cuda/cuda_execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ class CUDAExecutionProvider : public IExecutionProvider {

static AllocatorPtr CreateCudaAllocator(const CUDAAllocatorParams& cuda_allocator_params);

static AllocatorPtr CreateCudaPinnedAllocator(const CUDAAllocatorParams& cuda_allocator_params);

ITuningContext* GetTuningContext() const override;

std::unique_ptr<profiling::EpProfiler> GetProfiler() override;
Expand Down
9 changes: 9 additions & 0 deletions onnxruntime/core/providers/cuda/cuda_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,15 @@ struct ProviderInfo_CUDA_Impl final : ProviderInfo_CUDA {
params.arena_cfg = default_memory_arena_cfg;
return CUDAExecutionProvider::CreateCudaAllocator(params);
}

std::shared_ptr<IAllocator> CreateCudaPinnedAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, const OrtArenaCfg* default_memory_arena_cfg) override {
CUDAExecutionProvider::CUDAAllocatorParams params{};
params.device_id = device_id;
params.cuda_mem_threshold = gpu_mem_limit;
params.arena_extend_strategy = arena_extend_strategy;
params.arena_cfg = default_memory_arena_cfg;
return CUDAExecutionProvider::CreateCudaPinnedAllocator(params);
}
} g_info;

struct CUDA_Provider : Provider {
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/cuda/cuda_provider_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@

virtual std::shared_ptr<onnxruntime::IExecutionProviderFactory> CreateExecutionProviderFactory(const onnxruntime::CUDAExecutionProviderInfo& info) = 0;
virtual std::shared_ptr<onnxruntime::IAllocator> CreateCudaAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::CUDAExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) = 0;
virtual std::shared_ptr<onnxruntime::IAllocator> CreateCudaPinnedAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, const OrtArenaCfg* default_memory_arena_cfg) = 0;

Check warning on line 56 in onnxruntime/core/providers/cuda/cuda_provider_factory.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for shared_ptr<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/cuda/cuda_provider_factory.h:56: Add #include <memory> for shared_ptr<> [build/include_what_you_use] [4]

// This function is the entry point to CUDA EP's UT cases.
// All tests are only called from onnxruntime_provider_test.
Expand Down
6 changes: 4 additions & 2 deletions onnxruntime/core/session/environment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -403,9 +403,11 @@ Status Environment::CreateAndRegisterAllocatorV2(const std::string& provider_typ
#if defined(USE_CUDA) || defined(USE_CUDA_PROVIDER_INTERFACE)
if (provider_type == onnxruntime::kCudaExecutionProvider) {
if (mem_info.device.MemType() == OrtDevice::MemType::HOST_ACCESSIBLE) {
AllocatorPtr allocator_ptr = GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(
AllocatorPtr allocator_ptr = GetProviderInfo_CUDA().CreateCudaPinnedAllocator(
static_cast<int16_t>(mem_info.device.Id()),
onnxruntime::CUDA_PINNED);
arena_cfg->max_mem,
static_cast<ArenaExtendStrategy>(arena_cfg->arena_extend_strategy),
arena_cfg);
return RegisterAllocatorImpl(allocator_ptr);
} else {
CUDAExecutionProviderInfo cuda_ep_info;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ struct ProviderInfo_CUDA_TestImpl : ProviderInfo_CUDA {
return nullptr;
}

std::shared_ptr<onnxruntime::IAllocator> CreateCudaPinnedAllocator(int16_t, size_t, onnxruntime::ArenaExtendStrategy,
const OrtArenaCfg*) override {
return nullptr;
}

void TestAll() override {
// TestAll is the entry point of CUDA EP's internal tests.
// Those internal tests are not directly callable from onnxruntime_provider_test
Expand Down
Loading