microsoft · yuslepukhin · Jan 9, 2026 · Jan 2, 2026 · Jan 2, 2026 · Jan 6, 2026
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
@@ -203,6 +203,23 @@ AllocatorPtr CUDAExecutionProvider::CreateCudaAllocator(const CUDAAllocatorParam
   }
 }
 
+AllocatorPtr CUDAExecutionProvider::CreateCudaPinnedAllocator(const CUDAAllocatorParams& cuda_allocator_params) {
+  const auto* arena_cfg = cuda_allocator_params.arena_cfg;
+  AllocatorCreationInfo pinned_memory_info(
+      [](OrtDevice::DeviceId id) {
+        return std::make_unique<CUDAPinnedAllocator>(id, CUDA_PINNED);
+      },
+      cuda_allocator_params.device_id,
+      true,
+      {arena_cfg ? *arena_cfg
+                 : OrtArenaCfg(cuda_allocator_params.cuda_mem_threshold,
+                               static_cast<int>(cuda_allocator_params.arena_extend_strategy), -1, -1, -1, -1L)},
+      // stream-aware flag (intentionally set to false for this allocator)
+      false);
+
+  return CreateAllocator(pinned_memory_info);
+}
+
 CUDAExecutionProvider::PerThreadContext::PerThreadContext(OrtDevice::DeviceId device_id, cudaStream_t stream, size_t /*gpu_mem_limit*/,
                                                           ArenaExtendStrategy /*arena_extend_strategy*/, CUDAExecutionProviderExternalAllocatorInfo /*external_allocator_info*/,
                                                           OrtArenaCfg* /*default_memory_arena_cfg*/) {

diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.h b/onnxruntime/core/providers/cuda/cuda_execution_provider.h
@@ -115,6 +115,8 @@ class CUDAExecutionProvider : public IExecutionProvider {
 
   static AllocatorPtr CreateCudaAllocator(const CUDAAllocatorParams& cuda_allocator_params);
 
+  static AllocatorPtr CreateCudaPinnedAllocator(const CUDAAllocatorParams& cuda_allocator_params);
+
   ITuningContext* GetTuningContext() const override;
 
   std::unique_ptr<profiling::EpProfiler> GetProfiler() override;

diff --git a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc
@@ -189,6 +189,15 @@ struct ProviderInfo_CUDA_Impl final : ProviderInfo_CUDA {
     params.arena_cfg = default_memory_arena_cfg;
     return CUDAExecutionProvider::CreateCudaAllocator(params);
   }
+
+  std::shared_ptr<IAllocator> CreateCudaPinnedAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, const OrtArenaCfg* default_memory_arena_cfg) override {
+    CUDAExecutionProvider::CUDAAllocatorParams params{};
+    params.device_id = device_id;
+    params.cuda_mem_threshold = gpu_mem_limit;
+    params.arena_extend_strategy = arena_extend_strategy;
+    params.arena_cfg = default_memory_arena_cfg;
+    return CUDAExecutionProvider::CreateCudaPinnedAllocator(params);
+  }
 } g_info;
 
 struct CUDA_Provider : Provider {

diff --git a/onnxruntime/core/providers/cuda/cuda_provider_factory.h b/onnxruntime/core/providers/cuda/cuda_provider_factory.h
@@ -53,6 +53,7 @@
 
   virtual std::shared_ptr<onnxruntime::IExecutionProviderFactory> CreateExecutionProviderFactory(const onnxruntime::CUDAExecutionProviderInfo& info) = 0;
   virtual std::shared_ptr<onnxruntime::IAllocator> CreateCudaAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::CUDAExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) = 0;
+  virtual std::shared_ptr<onnxruntime::IAllocator> CreateCudaPinnedAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, const OrtArenaCfg* default_memory_arena_cfg) = 0;
 
   // This function is the entry point to CUDA EP's UT cases.
   // All tests are only called from onnxruntime_provider_test.

diff --git a/onnxruntime/core/session/environment.cc b/onnxruntime/core/session/environment.cc
@@ -403,9 +403,11 @@ Status Environment::CreateAndRegisterAllocatorV2(const std::string& provider_typ
 #if defined(USE_CUDA) || defined(USE_CUDA_PROVIDER_INTERFACE)
   if (provider_type == onnxruntime::kCudaExecutionProvider) {
     if (mem_info.device.MemType() == OrtDevice::MemType::HOST_ACCESSIBLE) {
-      AllocatorPtr allocator_ptr = GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(
+      AllocatorPtr allocator_ptr = GetProviderInfo_CUDA().CreateCudaPinnedAllocator(
           static_cast<int16_t>(mem_info.device.Id()),
-          onnxruntime::CUDA_PINNED);
+          arena_cfg->max_mem,
+          static_cast<ArenaExtendStrategy>(arena_cfg->arena_extend_strategy),
+          arena_cfg);
       return RegisterAllocatorImpl(allocator_ptr);
     } else {
       CUDAExecutionProviderInfo cuda_ep_info;

diff --git a/onnxruntime/test/providers/cuda/test_cases/cuda_test_provider.cc b/onnxruntime/test/providers/cuda/test_cases/cuda_test_provider.cc
@@ -105,6 +105,11 @@ struct ProviderInfo_CUDA_TestImpl : ProviderInfo_CUDA {
     return nullptr;
   }
 
+  std::shared_ptr<onnxruntime::IAllocator> CreateCudaPinnedAllocator(int16_t, size_t, onnxruntime::ArenaExtendStrategy,
+                                                                     const OrtArenaCfg*) override {
+    return nullptr;
+  }
+
   void TestAll() override {
     // TestAll is the entry point of CUDA EP's internal tests.
     // Those internal tests are not directly callable from onnxruntime_provider_test