Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/onnxruntime/core/framework/run_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ struct OrtRunOptions {

onnxruntime::InlinedVector<const onnxruntime::lora::LoraAdapter*> active_adapters;

// Optional sync stream for external resource import.
// When set, the EP uses this stream for execution, enabling proper
// synchronization with imported external semaphores.
OrtSyncStream* sync_stream = nullptr;

OrtRunOptions() = default;
~OrtRunOptions() = default;
};
Expand Down
301 changes: 301 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,9 @@ ORT_RUNTIME_CLASS(EpDevice);
ORT_RUNTIME_CLASS(KeyValuePairs);
ORT_RUNTIME_CLASS(SyncStream); // Opaque class to create an onnxruntime::Stream.
ORT_RUNTIME_CLASS(ExternalInitializerInfo);
ORT_RUNTIME_CLASS(ExternalResourceImporter); // Capability object for external resource import
ORT_RUNTIME_CLASS(ExternalMemoryHandle); // EP-imported view of shared external allocation
ORT_RUNTIME_CLASS(ExternalSemaphoreHandle); // EP-imported view of shared external semaphore

#ifdef _MSC_VER
typedef _Return_type_success_(return == 0) OrtStatus* OrtStatusPtr;
Expand Down Expand Up @@ -955,6 +958,87 @@ typedef void (*RunAsyncCallbackFn)(void* user_data, OrtValue** outputs, size_t n
*
* \nosubgrouping
*/

/** \addtogroup Global
* @{
*/

/** \brief External memory handle type for importing GPU resources.
*
* \since Version 1.24.
*/
typedef enum OrtExternalMemoryHandleType {
ORT_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 0, /**< Shared HANDLE from ID3D12Device::CreateSharedHandle(resource) */
ORT_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 1, /**< Shared HANDLE from ID3D12Device::CreateSharedHandle(heap) */
} OrtExternalMemoryHandleType;

/** \brief Access mode for imported external memory.
*
* \since Version 1.24.
*/
typedef enum OrtExternalMemoryAccessMode {
ORT_EXTERNAL_MEMORY_ACCESS_READ_WRITE = 0, /**< Memory can be read and written */
ORT_EXTERNAL_MEMORY_ACCESS_READ_ONLY = 1, /**< Memory is read-only */
ORT_EXTERNAL_MEMORY_ACCESS_WRITE_ONLY = 2, /**< Memory is write-only */
} OrtExternalMemoryAccessMode;

/** \brief Descriptor for importing external memory.
*
* \note The version field must be set to ORT_EXTERNAL_MEMORY_DESCRIPTOR_VERSION.
* This ensures forward compatibility as fields may be added in future versions.
*
* \since Version 1.24.
*/
#define ORT_EXTERNAL_MEMORY_DESCRIPTOR_VERSION 1
typedef struct OrtExternalMemoryDescriptor {
uint32_t version; /**< Must be ORT_EXTERNAL_MEMORY_DESCRIPTOR_VERSION */
OrtExternalMemoryHandleType handle_type; /**< Type of the external memory handle */
void* native_handle; /**< Platform-specific handle (e.g., Windows HANDLE) */
size_t size_bytes; /**< Total size in bytes of the external allocation */
size_t offset_bytes; /**< Offset in bytes into the allocation (default 0) */
OrtExternalMemoryAccessMode access_mode; /**< Access mode for the imported memory */
} OrtExternalMemoryDescriptor;

/** \brief External semaphore type for GPU synchronization.
*
* \since Version 1.24.
*/
typedef enum OrtExternalSemaphoreType {
ORT_EXTERNAL_SEMAPHORE_D3D12_FENCE = 0, /**< Shared HANDLE from ID3D12Device::CreateSharedHandle(fence) */
} OrtExternalSemaphoreType;

/** \brief Descriptor for importing external semaphores.
*
* \note The version field must be set to ORT_EXTERNAL_SEMAPHORE_DESCRIPTOR_VERSION.
* This ensures forward compatibility as fields may be added in future versions.
*
* \since Version 1.24.
*/
#define ORT_EXTERNAL_SEMAPHORE_DESCRIPTOR_VERSION 1
typedef struct OrtExternalSemaphoreDescriptor {
uint32_t version; /**< Must be ORT_EXTERNAL_SEMAPHORE_DESCRIPTOR_VERSION */
OrtExternalSemaphoreType type; /**< Type of the external semaphore */
void* native_handle; /**< Platform-specific handle (e.g., Windows HANDLE) */
} OrtExternalSemaphoreDescriptor;

/** \brief Descriptor for creating a tensor from imported external memory.
*
* \note The version field must be set to ORT_EXTERNAL_TENSOR_DESCRIPTOR_VERSION.
* This ensures forward compatibility as fields may be added in future versions.
*
* \since Version 1.24.
*/
#define ORT_EXTERNAL_TENSOR_DESCRIPTOR_VERSION 1
typedef struct OrtExternalTensorDescriptor {
uint32_t version; /**< Must be ORT_EXTERNAL_TENSOR_DESCRIPTOR_VERSION */
ONNXTensorElementDataType element_type; /**< Data type of tensor elements */
const int64_t* shape; /**< Array of dimension sizes */
size_t rank; /**< Number of dimensions */
size_t offset_bytes; /**< Optional offset within imported memory (default 0) */
} OrtExternalTensorDescriptor;

/// @}

/*
* Public enum for compiled model compatibility across EPs.
*/
Expand Down Expand Up @@ -6608,6 +6692,223 @@ struct OrtApi {
* \since Version 1.24
*/
ORT_API2_STATUS(KernelInfo_GetConfigEntries, _In_ const OrtKernelInfo* info, _Outptr_ OrtKeyValuePairs** out);

/// \name External Resource Import
/// @{

/** \brief Create an external resource importer for a specific EP device.
*
* The external resource importer is a capability object that provides methods for importing
* external GPU memory and semaphores for zero-copy import with an execution provider.
*
* \param[in] ep_device The OrtEpDevice instance to create the importer for.
* \param[out] out_importer Output parameter set to the created OrtExternalResourceImporter instance.
* Returns nullptr if the EP does not support external resource import.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(CreateExternalResourceImporterForDevice,
_In_ const OrtEpDevice* ep_device,
_Outptr_result_maybenull_ OrtExternalResourceImporter** out_importer);

/** \brief Release an OrtExternalResourceImporter instance.
*
* \param[in] importer The OrtExternalResourceImporter instance to release. May be nullptr.
*
* \since Version 1.24.
*/
ORT_CLASS_RELEASE(ExternalResourceImporter);

/** \brief Check if the external resource importer can import a specific memory handle type.
*
* \param[in] importer The OrtExternalResourceImporter instance.
* \param[in] handle_type The type of external memory handle to check.
* \param[out] out_supported Set to true if the handle type is supported.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(ExternalResourceImporter_CanImportMemory,
_In_ const OrtExternalResourceImporter* importer,
_In_ OrtExternalMemoryHandleType handle_type,
_Out_ bool* out_supported);

/** \brief Import external memory into the execution provider.
*
* \param[in] importer The OrtExternalResourceImporter instance.
* \param[in] desc Descriptor containing the external memory handle and properties.
* \param[out] out_handle Output parameter set to the created OrtExternalMemoryHandle.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(ExternalResourceImporter_ImportMemory,
_In_ OrtExternalResourceImporter* importer,
_In_ const OrtExternalMemoryDescriptor* desc,
_Outptr_ OrtExternalMemoryHandle** out_handle);

/** \brief Release an OrtExternalMemoryHandle instance.
*
* \param[in] handle The OrtExternalMemoryHandle instance to release. May be nullptr.
*
* \since Version 1.24.
*/
ORT_CLASS_RELEASE(ExternalMemoryHandle);

/** \brief Create a tensor backed by imported external memory.
*
* The created tensor is a view over the imported memory and does not copy data.
* The OrtExternalMemoryHandle must remain valid for the lifetime of the tensor.
*
* \param[in] importer The OrtExternalResourceImporter instance.
* \param[in] mem_handle The imported external memory handle.
* \param[in] tensor_desc Descriptor specifying tensor element type, shape, and optional offset.
* \param[in] tensor_location Optional OrtMemoryInfo for the tensor location. May be nullptr.
* \param[out] out_tensor Output parameter set to the created OrtValue containing the tensor.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(ExternalResourceImporter_CreateTensorFromMemory,
_In_ OrtExternalResourceImporter* importer,
_In_ const OrtExternalMemoryHandle* mem_handle,
_In_ const OrtExternalTensorDescriptor* tensor_desc,
_In_opt_ const OrtMemoryInfo* tensor_location,
_Outptr_ OrtValue** out_tensor);

/** \brief Check if the external resource importer can import a specific semaphore type.
*
* \param[in] importer The OrtExternalResourceImporter instance.
* \param[in] type The type of external semaphore to check.
* \param[out] out_supported Set to true if the semaphore type is supported.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(ExternalResourceImporter_CanImportSemaphore,
_In_ const OrtExternalResourceImporter* importer,
_In_ OrtExternalSemaphoreType type,
_Out_ bool* out_supported);

/** \brief Import an external semaphore into the execution provider.
*
* \param[in] importer The OrtExternalResourceImporter instance.
* \param[in] desc Descriptor containing the external semaphore handle and type.
* \param[out] out_handle Output parameter set to the created OrtExternalSemaphoreHandle.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(ExternalResourceImporter_ImportSemaphore,
_In_ OrtExternalResourceImporter* importer,
_In_ const OrtExternalSemaphoreDescriptor* desc,
_Outptr_ OrtExternalSemaphoreHandle** out_handle);

/** \brief Release an OrtExternalSemaphoreHandle instance.
*
* \param[in] handle The OrtExternalSemaphoreHandle instance to release. May be nullptr.
*
* \since Version 1.24.
*/
ORT_CLASS_RELEASE(ExternalSemaphoreHandle);

/** \brief Wait on an external semaphore on the EP's stream.
*
* Inserts a wait operation into the EP's stream that blocks until the semaphore
* reaches the specified value. This is used to synchronize with external GPU work
* (e.g., D3D12 timeline fence).
*
* \param[in] importer The OrtExternalResourceImporter instance.
* \param[in] semaphore_handle The imported external semaphore.
* \param[in] stream The OrtSyncStream to wait on.
* \param[in] value The fence/semaphore value to wait for.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(ExternalResourceImporter_WaitSemaphore,
_In_ OrtExternalResourceImporter* importer,
_In_ OrtExternalSemaphoreHandle* semaphore_handle,
_In_ OrtSyncStream* stream,
_In_ uint64_t value);

/** \brief Signal an external semaphore from the EP's stream.
*
* Inserts a signal operation into the EP's stream that sets the semaphore
* to the specified value when reached. This is used to notify external GPU work
* (e.g., D3D12 timeline fence) that ORT inference is complete.
*
* \param[in] importer The OrtExternalResourceImporter instance.
* \param[in] semaphore_handle The imported external semaphore.
* \param[in] stream The OrtSyncStream to signal from.
* \param[in] value The fence/semaphore value to signal.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24.
*/
ORT_API2_STATUS(ExternalResourceImporter_SignalSemaphore,
_In_ OrtExternalResourceImporter* importer,
_In_ OrtExternalSemaphoreHandle* semaphore_handle,
_In_ OrtSyncStream* stream,
_In_ uint64_t value);

/** \brief Get the EP device assigned to each session output.
*
* Returns the OrtEpDevice assigned to each output of the session after graph partitioning.
* This allows validation that outputs are placed on the expected device for external resource sharing.
*
* The EP device for each output is determined by which execution provider claims that output
* during graph partitioning. This information is useful for:
* - Validating that outputs will be placed on the expected device for external resource sharing
* - Deciding whether to use external memory handles for outputs
*
* \param[in] session The OrtSession instance to query.
* \param[out] outputs_ep_devices An array to be filled with the EP device for each output.
* The array must be allocated by the caller with space for
* OrtEpDevice* values for each output.
* The order is the same as returned by SessionGetOutputName.
* \param[in] num_outputs The number of outputs in the session. Must match SessionGetOutputCount.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24
*/
ORT_API2_STATUS(SessionGetEpDeviceForOutputs, _In_ const OrtSession* session,
_Out_writes_(num_outputs) const OrtEpDevice** outputs_ep_devices,
_In_ size_t num_outputs);

/** \brief Associate an OrtSyncStream with run options.
*
* Associates an OrtSyncStream with OrtRunOptions for use with Run() or RunWithBinding().
* When a sync stream is set, the EP uses this stream for execution, enabling proper
* synchronization with imported external semaphores.
*
* This approach:
* - Works with both Run() and RunWithBinding() — no IOBinding requirement
* - Allows different Run calls to use different streams for concurrent inference
* - Integrates cleanly with the external semaphore wait/signal pattern
*
* \param[in] run_options The OrtRunOptions instance to modify.
* \param[in] stream The OrtSyncStream to associate with the run options. May be nullptr to clear.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.24
*/
ORT_API2_STATUS(RunOptions_SetSyncStream,
_Inout_ OrtRunOptions* run_options,
_In_opt_ OrtSyncStream* stream);

/// @}
};

/*
Expand Down
11 changes: 11 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1287,6 +1287,16 @@ struct RunOptions : detail::Base<OrtRunOptions> {
* \param adapter The LoraAdapter to be used as the active adapter
*/
RunOptions& AddActiveLoraAdapter(const LoraAdapter& adapter);

/** \brief Associate a sync stream with the run options.
*
* When set, the EP uses this stream for execution, enabling proper
* synchronization with imported external semaphores.
*
* Wraps OrtApi::RunOptions_SetSyncStream
* \param stream The OrtSyncStream to associate with these run options. May be nullptr to clear.
*/
RunOptions& SetSyncStream(OrtSyncStream* stream);
};

namespace detail {
Expand Down Expand Up @@ -1607,6 +1617,7 @@ struct ConstSessionImpl : Base<T> {
std::vector<ConstMemoryInfo> GetMemoryInfoForInputs() const; ///< Wrapper for OrtApi::SessionGetMemoryInfoForInputs
std::vector<ConstMemoryInfo> GetMemoryInfoForOutputs() const; ///< Wrapper for OrtApi::SessionGetMemoryInfoForOutputs
std::vector<ConstEpDevice> GetEpDeviceForInputs() const; ///< Wrapper for OrtApi::SessionGetEpDeviceForInputs
std::vector<ConstEpDevice> GetEpDeviceForOutputs() const; ///< Wrapper for OrtApi::SessionGetEpDeviceForOutputs

/** \brief Returns a copy of input name at the specified index.
*
Expand Down
18 changes: 18 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,11 @@ inline RunOptions& RunOptions::AddActiveLoraAdapter(const LoraAdapter& adapter)
return *this;
}

inline RunOptions& RunOptions::SetSyncStream(OrtSyncStream* stream) {
ThrowOnError(GetApi().RunOptions_SetSyncStream(p_, stream));
return *this;
}

inline ModelCompilationOptions::ModelCompilationOptions(const Env& env, const SessionOptions& session_options) {
ThrowOnError(GetCompileApi().CreateModelCompilationOptionsFromSessionOptions(env, session_options, &this->p_));
}
Expand Down Expand Up @@ -1660,6 +1665,19 @@ inline std::vector<ConstEpDevice> ConstSessionImpl<T>::GetEpDeviceForInputs() co
return input_devices;
}

template <typename T>
inline std::vector<ConstEpDevice> ConstSessionImpl<T>::GetEpDeviceForOutputs() const {
auto num_outputs = GetOutputCount();
std::vector<ConstEpDevice> output_devices;
if (num_outputs > 0) {
output_devices.resize(num_outputs);
ThrowOnError(GetApi().SessionGetEpDeviceForOutputs(this->p_,
reinterpret_cast<const OrtEpDevice**>(output_devices.data()),
num_outputs));
}
return output_devices;
}

template <typename T>
inline uint64_t ConstSessionImpl<T>::GetProfilingStartTimeNs() const {
uint64_t out;
Expand Down
Loading
Loading