diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index eff0801a00460..eab616388d6ae 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -203,6 +203,23 @@ AllocatorPtr CUDAExecutionProvider::CreateCudaAllocator(const CUDAAllocatorParam } } +AllocatorPtr CUDAExecutionProvider::CreateCudaPinnedAllocator(const CUDAAllocatorParams& cuda_allocator_params) { + const auto* arena_cfg = cuda_allocator_params.arena_cfg; + AllocatorCreationInfo pinned_memory_info( + [](OrtDevice::DeviceId id) { + return std::make_unique(id, CUDA_PINNED); + }, + cuda_allocator_params.device_id, + true, + {arena_cfg ? *arena_cfg + : OrtArenaCfg(cuda_allocator_params.cuda_mem_threshold, + static_cast(cuda_allocator_params.arena_extend_strategy), -1, -1, -1, -1L)}, + // stream-aware flag (intentionally set to false for this allocator) + false); + + return CreateAllocator(pinned_memory_info); +} + CUDAExecutionProvider::PerThreadContext::PerThreadContext(OrtDevice::DeviceId device_id, cudaStream_t stream, size_t /*gpu_mem_limit*/, ArenaExtendStrategy /*arena_extend_strategy*/, CUDAExecutionProviderExternalAllocatorInfo /*external_allocator_info*/, OrtArenaCfg* /*default_memory_arena_cfg*/) { diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.h b/onnxruntime/core/providers/cuda/cuda_execution_provider.h index 751bbb90f8619..191b30f9d8cda 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.h +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.h @@ -115,6 +115,8 @@ class CUDAExecutionProvider : public IExecutionProvider { static AllocatorPtr CreateCudaAllocator(const CUDAAllocatorParams& cuda_allocator_params); + static AllocatorPtr CreateCudaPinnedAllocator(const CUDAAllocatorParams& cuda_allocator_params); + ITuningContext* GetTuningContext() const override; std::unique_ptr GetProfiler() override; diff --git a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc index 70afba320576b..dbed9953da878 100644 --- a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc +++ b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc @@ -189,6 +189,15 @@ struct ProviderInfo_CUDA_Impl final : ProviderInfo_CUDA { params.arena_cfg = default_memory_arena_cfg; return CUDAExecutionProvider::CreateCudaAllocator(params); } + + std::shared_ptr CreateCudaPinnedAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, const OrtArenaCfg* default_memory_arena_cfg) override { + CUDAExecutionProvider::CUDAAllocatorParams params{}; + params.device_id = device_id; + params.cuda_mem_threshold = gpu_mem_limit; + params.arena_extend_strategy = arena_extend_strategy; + params.arena_cfg = default_memory_arena_cfg; + return CUDAExecutionProvider::CreateCudaPinnedAllocator(params); + } } g_info; struct CUDA_Provider : Provider { diff --git a/onnxruntime/core/providers/cuda/cuda_provider_factory.h b/onnxruntime/core/providers/cuda/cuda_provider_factory.h index e83ef6f9b329f..1a4b19cb100d3 100644 --- a/onnxruntime/core/providers/cuda/cuda_provider_factory.h +++ b/onnxruntime/core/providers/cuda/cuda_provider_factory.h @@ -53,6 +53,7 @@ struct ProviderInfo_CUDA { virtual std::shared_ptr CreateExecutionProviderFactory(const onnxruntime::CUDAExecutionProviderInfo& info) = 0; virtual std::shared_ptr CreateCudaAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::CUDAExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) = 0; + virtual std::shared_ptr CreateCudaPinnedAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, const OrtArenaCfg* default_memory_arena_cfg) = 0; // This function is the entry point to CUDA EP's UT cases. // All tests are only called from onnxruntime_provider_test. diff --git a/onnxruntime/core/session/environment.cc b/onnxruntime/core/session/environment.cc index cde77eeed8aa5..9008a906155fd 100644 --- a/onnxruntime/core/session/environment.cc +++ b/onnxruntime/core/session/environment.cc @@ -403,9 +403,11 @@ Status Environment::CreateAndRegisterAllocatorV2(const std::string& provider_typ #if defined(USE_CUDA) || defined(USE_CUDA_PROVIDER_INTERFACE) if (provider_type == onnxruntime::kCudaExecutionProvider) { if (mem_info.device.MemType() == OrtDevice::MemType::HOST_ACCESSIBLE) { - AllocatorPtr allocator_ptr = GetProviderInfo_CUDA().CreateCUDAPinnedAllocator( + AllocatorPtr allocator_ptr = GetProviderInfo_CUDA().CreateCudaPinnedAllocator( static_cast(mem_info.device.Id()), - onnxruntime::CUDA_PINNED); + arena_cfg->max_mem, + static_cast(arena_cfg->arena_extend_strategy), + arena_cfg); return RegisterAllocatorImpl(allocator_ptr); } else { CUDAExecutionProviderInfo cuda_ep_info; diff --git a/onnxruntime/test/providers/cuda/test_cases/cuda_test_provider.cc b/onnxruntime/test/providers/cuda/test_cases/cuda_test_provider.cc index 735bd89aff260..01c7573b9de14 100644 --- a/onnxruntime/test/providers/cuda/test_cases/cuda_test_provider.cc +++ b/onnxruntime/test/providers/cuda/test_cases/cuda_test_provider.cc @@ -105,6 +105,11 @@ struct ProviderInfo_CUDA_TestImpl : ProviderInfo_CUDA { return nullptr; } + std::shared_ptr CreateCudaPinnedAllocator(int16_t, size_t, onnxruntime::ArenaExtendStrategy, + const OrtArenaCfg*) override { + return nullptr; + } + void TestAll() override { // TestAll is the entry point of CUDA EP's internal tests. // Those internal tests are not directly callable from onnxruntime_provider_test