Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmake/onnxruntime_providers_webgpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,12 @@
if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
message(FATAL_ERROR "WebGPU EP shared library build is not supported on Emscripten. Please use static library build.")
endif()

# Configure precompiled headers for shared library build
# PCH ensures ep/_pch.h is included first and improves compilation speed
target_precompile_headers(onnxruntime_providers_webgpu PRIVATE
"${REPO_ROOT}/include/onnxruntime/ep/_pch.h"
)
endif()

set_target_properties(onnxruntime_providers_webgpu PROPERTIES CXX_STANDARD_REQUIRED ON)
Expand Down
141 changes: 64 additions & 77 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,61 @@ endif()

partition_provider_test_srcs(all_tests onnxruntime_provider_test_srcs onnxruntime_test_all_srcs)

# Shared settings for onnxruntime test targets.
function(onnxruntime_apply_common_test_target_settings target)
if (UNIX AND (onnxruntime_USE_TENSORRT OR onnxruntime_USE_NV))
# The test_main.cc includes NvInfer.h where it has many deprecated declarations
# simply ignore them for TensorRT EP build
set_property(TARGET ${target} APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
endif()

if (MSVC)
# TODO: The test code for OpenVINO, QNN, and WebGPU is getting flagged with a warning from ABSL for unreachabel code.
# Need to figure out how those particular targets/build variants are failing, but regular windows is not.
target_compile_options(${target} PRIVATE "/wd4702")
endif()

# TODO fix shorten-64-to-32 warnings
# there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline'
if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
target_compile_options(${target} PRIVATE -Wno-error=shorten-64-to-32)
endif()
endfunction()

# Set environment variables for plugin EP tests when run via CTest.
function(onnxruntime_set_plugin_ep_test_environment target)
if(onnxruntime_USE_WEBGPU AND NOT onnxruntime_BUILD_WEBGPU_EP_STATIC_LIB)
set(ORT_PLUGIN_EP_JSON_CONFIG "{\"ep_library_registration_name\": \"WebGPU_PluginEP\", \"ep_library_path\": \"onnxruntime_providers_webgpu.dll\", \"selected_ep_name\": \"WebGpuExecutionProvider\"}")
set_tests_properties(${target} PROPERTIES
ENVIRONMENT "ORT_UNIT_TEST_MAIN_DYNAMIC_PLUGIN_EP_CONFIG_JSON=${ORT_PLUGIN_EP_JSON_CONFIG}"
)
# TODO: add for other plugin EPs if needed
# elseif()
endif()
endfunction()

function(onnxruntime_apply_emscripten_test_link_settings target)
if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set_target_properties(${target} PROPERTIES LINK_DEPENDS ${TEST_SRC_DIR}/wasm/onnxruntime_test_adapter.js)
set_target_properties(${target} PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre.js)
set_target_properties(${target} PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s INITIAL_MEMORY=536870912 -s ALLOW_MEMORY_GROWTH=1 -s MAXIMUM_MEMORY=4294967296 -s INCOMING_MODULE_JS_API=[preRun,locateFile,arguments,onExit,wasmMemory,buffer,instantiateWasm] --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_adapter.js\" --pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1")
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS " -s DEFAULT_PTHREAD_STACK_SIZE=131072 -s PROXY_TO_PTHREAD=1")
endif()
if (onnxruntime_USE_JSEP)
set_target_properties(${target} PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js)
set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS " --pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js\"")
endif()

###
### if you want to investigate or debug a test failure in ${target}, replace the following line.
### those flags slow down the CI test significantly, so we don't use them by default.
###
# set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=2")
set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=0 -s SAFE_HEAP=0 -s STACK_OVERFLOW_CHECK=1")
endif()
endfunction()

list(APPEND onnxruntime_test_all_srcs ${onnxruntime_unittest_main_src})
AddTest(
TARGET onnxruntime_test_all
Expand All @@ -1035,6 +1090,9 @@ AddTest(
)
target_include_directories(onnxruntime_test_all PRIVATE ${ONNXRUNTIME_ROOT}/core/flatbuffers/schema) # ort.fbs.h

onnxruntime_apply_common_test_target_settings(onnxruntime_test_all)
onnxruntime_set_plugin_ep_test_environment(onnxruntime_test_all)

if (MSVC)
# The warning means the type of two integral values around a binary operator is narrow than their result.
# If we promote the two input values first, it could be more tolerant to integer overflow.
Expand All @@ -1044,10 +1102,6 @@ if (MSVC)
target_compile_options(onnxruntime_test_all PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /wd4244>"
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4244>")

# TODO: The test code for OpenVINO, QNN, and WebGPU is getting flagged with a warning from ABSL for unreachabel code.
# Need to figure out how those particular targets/build variants are failing, but regular windows is not.
target_compile_options(onnxruntime_test_all PRIVATE "/wd4702")

# Avoid this compile error in graph_transform_test.cc and qdq_transformer_test.cc:
# fatal error C1128: number of sections exceeded object file format limit: compile with /bigobj
set_property(SOURCE "${TEST_SRC_DIR}/optimizer/graph_transform_test.cc"
Expand All @@ -1057,18 +1111,6 @@ else()
target_compile_options(onnxruntime_test_all PRIVATE "-Wno-parentheses")
endif()

# TODO fix shorten-64-to-32 warnings
# there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline'
if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
target_compile_options(onnxruntime_test_all PRIVATE -Wno-error=shorten-64-to-32)
endif()

if (UNIX AND (onnxruntime_USE_TENSORRT OR onnxruntime_USE_NV))
# The test_main.cc includes NvInfer.h where it has many deprecated declarations
# simply ignore them for TensorRT EP build
set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
endif()

if (MSVC AND onnxruntime_ENABLE_STATIC_ANALYSIS)
# attention_op_test.cc: Function uses '49152' bytes of stack: exceeds /analyze:stacksize '16384'..
target_compile_options(onnxruntime_test_all PRIVATE "/analyze:stacksize 131072")
Expand Down Expand Up @@ -1099,25 +1141,7 @@ endif()
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
target_link_libraries(onnxruntime_test_all PRIVATE Python::Python)
endif()
if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set_target_properties(onnxruntime_test_all PROPERTIES LINK_DEPENDS ${TEST_SRC_DIR}/wasm/onnxruntime_test_adapter.js)
set_target_properties(onnxruntime_test_all PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre.js)
set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s INITIAL_MEMORY=536870912 -s ALLOW_MEMORY_GROWTH=1 -s MAXIMUM_MEMORY=4294967296 -s INCOMING_MODULE_JS_API=[preRun,locateFile,arguments,onExit,wasmMemory,buffer,instantiateWasm] --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_adapter.js\" --pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1")
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s DEFAULT_PTHREAD_STACK_SIZE=131072 -s PROXY_TO_PTHREAD=1")
endif()
if (onnxruntime_USE_JSEP)
set_target_properties(onnxruntime_test_all PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js)
set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " --pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js\"")
endif()

###
### if you want to investigate or debug a test failure in onnxruntime_test_all, replace the following line.
### those flags slow down the CI test significantly, so we don't use them by default.
###
# set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=2")
set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=0 -s SAFE_HEAP=0 -s STACK_OVERFLOW_CHECK=1")
endif()
onnxruntime_apply_emscripten_test_link_settings(onnxruntime_test_all)

if (onnxruntime_ENABLE_ATEN)
target_compile_definitions(onnxruntime_test_all PRIVATE ENABLE_ATEN)
Expand Down Expand Up @@ -1233,58 +1257,21 @@ block()
DEPENDS ${onnxruntime_provider_test_deps}
)

# Expose QNN SDK headers to unit tests via an interface target
onnxruntime_apply_common_test_target_settings(onnxruntime_provider_test)
onnxruntime_set_plugin_ep_test_environment(onnxruntime_provider_test)

# Expose QNN SDK headers to unit tests via an interface target
if(onnxruntime_USE_QNN)
add_library(qnn_sdk_headers_include INTERFACE)
target_include_directories(qnn_sdk_headers_include INTERFACE
${onnxruntime_QNN_HOME}/include
${onnxruntime_QNN_HOME}/include/QNN)
target_link_libraries(onnxruntime_provider_test PRIVATE qnn_sdk_headers_include)
endif()

if (UNIX AND (onnxruntime_USE_TENSORRT OR onnxruntime_USE_NV))
# The test_main.cc includes NvInfer.h where it has many deprecated declarations
# simply ignore them for TensorRT EP build
set_property(TARGET onnxruntime_provider_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
endif()

# enable dynamic plugin EP usage
target_compile_definitions(onnxruntime_provider_test PRIVATE ORT_UNIT_TEST_ENABLE_DYNAMIC_PLUGIN_EP_USAGE)


if (MSVC)
# TODO: The test code for OpenVINO, QNN, and WebGPU is getting flagged with a warning from ABSL for unreachabel code.
# Need to figure out how those particular targets/build variants are failing, but regular windows is not.
target_compile_options(onnxruntime_provider_test PRIVATE "/wd4702")
endif()

# TODO fix shorten-64-to-32 warnings
# there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline'
if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
target_compile_options(onnxruntime_provider_test PRIVATE -Wno-error=shorten-64-to-32)
endif()

# copied from onnxruntime_test_all
# TODO reuse instead of copy?
if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set_target_properties(onnxruntime_provider_test PROPERTIES LINK_DEPENDS ${TEST_SRC_DIR}/wasm/onnxruntime_test_adapter.js)
set_target_properties(onnxruntime_provider_test PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre.js)
set_target_properties(onnxruntime_provider_test PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s INITIAL_MEMORY=536870912 -s ALLOW_MEMORY_GROWTH=1 -s MAXIMUM_MEMORY=4294967296 -s INCOMING_MODULE_JS_API=[preRun,locateFile,arguments,onExit,wasmMemory,buffer,instantiateWasm] --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_adapter.js\" --pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1")
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
set_property(TARGET onnxruntime_provider_test APPEND_STRING PROPERTY LINK_FLAGS " -s DEFAULT_PTHREAD_STACK_SIZE=131072 -s PROXY_TO_PTHREAD=1")
endif()
if (onnxruntime_USE_JSEP)
set_target_properties(onnxruntime_provider_test PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js)
set_property(TARGET onnxruntime_provider_test APPEND_STRING PROPERTY LINK_FLAGS " --pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js\"")
endif()

###
### if you want to investigate or debug a test failure in onnxruntime_provider_test, replace the following line.
### those flags slow down the CI test significantly, so we don't use them by default.
###
# set_property(TARGET onnxruntime_provider_test APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=2")
set_property(TARGET onnxruntime_provider_test APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=0 -s SAFE_HEAP=0 -s STACK_OVERFLOW_CHECK=1")
endif()
onnxruntime_apply_emscripten_test_link_settings(onnxruntime_provider_test)

if (IOS)
add_custom_command(
Expand Down
7 changes: 7 additions & 0 deletions include/onnxruntime/ep/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
## EP adapter

This folder contains a set of C++ header files. They are used specifically for allowing ONNX Runtime internal kernel-based EPs to use the plugin-style EP API while keep minimal changes to existing code.

### Usage

Make sure to include "ep/_pch.h" for all source code in the implementation. Using PCH is recommended.
61 changes: 61 additions & 0 deletions include/onnxruntime/ep/_pch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include "api.h"

Check warning on line 6 in include/onnxruntime/ep/_pch.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Include the directory when naming header files [build/include_subdir] [4] Raw Output: include/onnxruntime/ep/_pch.h:6: Include the directory when naming header files [build/include_subdir] [4]
#include "common.h"

Check warning on line 7 in include/onnxruntime/ep/_pch.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Include the directory when naming header files [build/include_subdir] [4] Raw Output: include/onnxruntime/ep/_pch.h:7: Include the directory when naming header files [build/include_subdir] [4]

// This header is only used when building WebGPU/CUDA EP as a shared library.
//
// This header file is used as a precompiled header so it is always included first.

#pragma push_macro("ORT_EP_API_ADAPTER_HEADER_INCLUDED")
#define ORT_EP_API_ADAPTER_HEADER_INCLUDED

#include "adapter/allocator.h"
#include "adapter/logging.h"
#include "adapter/ep.h"
#include "adapter/kernel_registry.h"

#pragma pop_macro("ORT_EP_API_ADAPTER_HEADER_INCLUDED")

//
// EP specific using declarations
//

#define EP_SPECIFIC_USING_DECLARATIONS \
using FuncManager = onnxruntime::ep::adapter::FuncManager; \
using KernelCreatePtrFn = onnxruntime::ep::adapter::KernelCreatePtrFn; \
using KernelDefBuilder = onnxruntime::ep::adapter::KernelDefBuilder; \
using KernelRegistry = onnxruntime::ep::adapter::KernelRegistry; \
using KernelCreateInfo = onnxruntime::ep::adapter::KernelCreateInfo; \
using BuildKernelCreateInfoFn = onnxruntime::ep::adapter::KernelCreateInfo (*)(); \
using OpKernelInfo = onnxruntime::ep::adapter::OpKernelInfo; \
using OpKernelContext = onnxruntime::ep::adapter::OpKernelContext; \
using OpKernel = onnxruntime::ep::adapter::OpKernel; \
using DataTransferManager = onnxruntime::ep::adapter::DataTransferManager; \
namespace logging { \
using Logger = onnxruntime::ep::adapter::Logger; \
}

namespace onnxruntime {
namespace webgpu {
EP_SPECIFIC_USING_DECLARATIONS
} // namespace webgpu
namespace cuda {
EP_SPECIFIC_USING_DECLARATIONS
} // namespace cuda

#ifndef DISABLE_CONTRIB_OPS
namespace contrib {
namespace webgpu {
EP_SPECIFIC_USING_DECLARATIONS
} // namespace webgpu
namespace cuda {
EP_SPECIFIC_USING_DECLARATIONS
} // namespace cuda
} // namespace contrib
#endif

} // namespace onnxruntime
45 changes: 45 additions & 0 deletions include/onnxruntime/ep/adapter/allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include "core/framework/allocator.h"

namespace onnxruntime {
namespace ep {
namespace adapter {

/// <summary>
/// A bridge class between the EP API OrtAllocator and an IAllocator implementation.
/// </summary>
class Allocator : public OrtAllocator {
public:
explicit Allocator(AllocatorPtr impl) : OrtAllocator{}, impl_(impl) {
version = ORT_API_VERSION;
Alloc = AllocImpl;
Free = FreeImpl;
Info = InfoImpl;
}

private:
static void* ORT_API_CALL AllocImpl(OrtAllocator* this_ptr, size_t size) noexcept {
auto* allocator = static_cast<Allocator*>(this_ptr);
return allocator->impl_->Alloc(size);
}

static void ORT_API_CALL FreeImpl(OrtAllocator* this_ptr, void* p) noexcept {
auto* allocator = static_cast<Allocator*>(this_ptr);
allocator->impl_->Free(p);
}

static const OrtMemoryInfo* ORT_API_CALL InfoImpl(const OrtAllocator* this_ptr) noexcept {
auto* allocator = static_cast<const Allocator*>(this_ptr);
return &allocator->impl_->Info();
}

AllocatorPtr impl_;
};

} // namespace adapter
} // namespace ep
} // namespace onnxruntime
54 changes: 54 additions & 0 deletions include/onnxruntime/ep/adapter/data_transfer_manager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#if !defined(ORT_EP_API_ADAPTER_HEADER_INCLUDED)
#error "This header should not be included directly. Include ep/_pch.h instead."
#endif

#include "core/common/status.h"
#include "core/common/common.h"
#include "core/framework/data_transfer.h"
#include "core/framework/tensor.h"

namespace onnxruntime {
namespace ep {
namespace adapter {

/// <summary>
/// An adapter class partially implementing the facade of `onnxruntime::DataTransferManager`.
/// </summary>
struct DataTransferManager {
explicit DataTransferManager(std::unique_ptr<IDataTransfer> impl) : impl_{std::move(impl)} {}

Check warning on line 23 in include/onnxruntime/ep/adapter/data_transfer_manager.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <utility> for move [build/include_what_you_use] [4] Raw Output: include/onnxruntime/ep/adapter/data_transfer_manager.h:23: Add #include <utility> for move [build/include_what_you_use] [4]

common::Status CopyTensor(const Tensor& src, Tensor& dst) const {
if (src.Shape().Size() != dst.Shape().Size()) {
return ORT_MAKE_STATUS(ONNXRUNTIME,
FAIL,
"Tensor size mismatch: source tensor size is ",
src.Shape().Size(),
", destination tensor size is ",
dst.Shape().Size());
}

if (impl_->CanCopy(src.Location().device, dst.Location().device)) {
return impl_->CopyTensor(src, dst);
}

return ORT_MAKE_STATUS(ONNXRUNTIME,
FAIL,
"There's no data transfer registered for copying tensors from ",
src.Location().device.ToString(),
" to ",
dst.Location().device.ToString());
}

private:
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(DataTransferManager);
std::unique_ptr<IDataTransfer> impl_;

Check warning on line 49 in include/onnxruntime/ep/adapter/data_transfer_manager.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for unique_ptr<> [build/include_what_you_use] [4] Raw Output: include/onnxruntime/ep/adapter/data_transfer_manager.h:49: Add #include <memory> for unique_ptr<> [build/include_what_you_use] [4]
};

} // namespace adapter
} // namespace ep
} // namespace onnxruntime
Loading
Loading