Skip to content

Commit

Permalink
Initial WebGPU EP checkin (microsoft#22318)
Browse files Browse the repository at this point in the history
This change introduces the WebGPU EP into ONNX Runtime.

To make the PR as simple as possible, this PR excluded the following:
- C API changes for WebGPU EP
- actual implementation of WebGPU EP. Currently in this PR, WebGPU is a
stub implementation that does not register any kernel.
- Python IO Binding update
- Node.js IO Binding update

This PR now contains only 43 file changes (while the working branch
contains 130+) and hopefully this makes it easier to review.

There is going to be separated PRs for each mentioned above.

Current working branch: microsoft#21904
  • Loading branch information
fs-eire authored and Ishwar Raut committed Nov 19, 2024
1 parent c793f68 commit e1b8ce4
Show file tree
Hide file tree
Showing 41 changed files with 748 additions and 56 deletions.
22 changes: 22 additions & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ option(onnxruntime_TVM_USE_LLVM "Build TVM with LLVM. Set customized path to llv
option(onnxruntime_TVM_USE_HASH "Build ipp-crypto library for support hash algorithm. It is defined for TVM only")
option(onnxruntime_USE_XNNPACK "Build with XNNPACK support. Provides an alternative math library on ARM, WebAssembly and x86." OFF)
option(onnxruntime_USE_WEBNN "Build with WebNN support. Enable hardware acceleration in web browsers." OFF)
option(onnxruntime_USE_WEBGPU "Build with WebGPU support. Enable WebGPU via C/C++ interface." OFF)

# Options related to reducing the binary size produced by the build
# XNNPACK EP requires the internal NHWC contrib ops to be available, so this option must be OFF when onnxruntime_USE_XNNPACK is ON
Expand Down Expand Up @@ -490,6 +491,22 @@ if (onnxruntime_BUILD_CSHARP)
endif()
endif()

if (onnxruntime_BUILD_OBJC)
check_language(OBJC)
if(CMAKE_OBJC_COMPILER)
enable_language(OBJC)
else()
message(FATAL_ERROR "Objective-C is not supported.")
endif()

check_language(OBJCXX)
if(CMAKE_OBJCXX_COMPILER)
enable_language(OBJCXX)
else()
message(FATAL_ERROR "Objective-C++ is not supported.")
endif()
endif()

if (NOT WIN32)
#TODO: On Linux we may try https://github.com/microsoft/TraceLogging.git
if (onnxruntime_ENABLE_INSTRUMENT)
Expand Down Expand Up @@ -917,6 +934,11 @@ if (onnxruntime_USE_WEBNN)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBNN=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES webnn)
endif()
if (onnxruntime_USE_WEBGPU)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_WEBGPU=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBGPU=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES webgpu)
endif()
if (onnxruntime_USE_CANN)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_CANN=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CANN=1)
Expand Down
82 changes: 66 additions & 16 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -576,10 +576,11 @@ if (onnxruntime_USE_MIMALLOC)
onnxruntime_fetchcontent_makeavailable(mimalloc)
endif()

#onnxruntime_EXTERNAL_LIBRARIES could contain onnx, onnx_proto,libprotobuf, cuda/cudnn,
# dnnl/mklml, onnxruntime_codegen_tvm, tvm and pthread
# pthread is always at the last
set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date ${ONNXRUNTIME_CLOG_TARGET_NAME})
set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json
onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface
flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date
${ONNXRUNTIME_CLOG_TARGET_NAME})

# The source code of onnx_proto is generated, we must build this lib first before starting to compile the other source code that uses ONNX protobuf types.
# The other libs do not have the problem. All the sources are already there. We can compile them in any order.
set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto flatbuffers::flatbuffers)
Expand Down Expand Up @@ -634,24 +635,73 @@ if (onnxruntime_USE_COREML)
FetchContent_Populate(coremltools)
endif()

message(STATUS "Finished fetching external dependencies")
if (onnxruntime_USE_WEBGPU)
FetchContent_Declare(
dawn
URL ${DEP_URL_dawn}
URL_HASH SHA1=${DEP_SHA1_dawn}
PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch
)

set(onnxruntime_LINK_DIRS )
# use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size
set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE)
set(DAWN_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_NULL OFF CACHE BOOL "" FORCE)
set(DAWN_FETCH_DEPENDENCIES ON CACHE BOOL "" FORCE)

# disable things we don't use
set(DAWN_DXC_ENABLE_ASSERTS_IN_NDEBUG OFF)
set(DAWN_ENABLE_DESKTOP_GL OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_OPENGLES OFF CACHE BOOL "" FORCE)
set(DAWN_SUPPORTS_GLFW_FOR_WINDOWING OFF CACHE BOOL "" FORCE)
set(DAWN_USE_GLFW OFF CACHE BOOL "" FORCE)
set(DAWN_USE_WINDOWS_UI OFF CACHE BOOL "" FORCE)
set(DAWN_USE_X11 OFF CACHE BOOL "" FORCE)

set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_CMD_TOOLS OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE) # don't need. disabling is a large binary size saving
set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key. runtime error if not enabled.

# SPIR-V validation shouldn't be required given we're using Tint to create the SPIR-V.
set(DAWN_ENABLE_SPIRV_VALIDATION OFF CACHE BOOL "" FORCE)

if (WIN32)
# building this requires the HLSL writer to be enabled in Tint. TBD if that we need either of these to be ON.
set(DAWN_USE_BUILT_DXC ON CACHE BOOL "" FORCE)
set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "" FORCE)

# Vulkan may optionally be included in a Windows build. Exclude until we have an explicit use case that requires it.
set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE)
endif()

onnxruntime_fetchcontent_makeavailable(dawn)

list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_native dawn::dawn_proc)
endif()

set(onnxruntime_LINK_DIRS)
if (onnxruntime_USE_CUDA)
find_package(CUDAToolkit REQUIRED)
find_package(CUDAToolkit REQUIRED)

if(onnxruntime_CUDNN_HOME)
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
endif()
include(cuDNN)
if(onnxruntime_CUDNN_HOME)
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
endif()

include(cuDNN)
endif()

if(onnxruntime_USE_SNPE)
include(external/find_snpe.cmake)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS})
include(external/find_snpe.cmake)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS})
endif()

FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR)
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR)
FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR)
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR)

message(STATUS "Finished fetching external dependencies")
67 changes: 61 additions & 6 deletions cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,22 @@ elseif(onnxruntime_BUILD_APPLE_FRAMEWORK)
# create Info.plist for the framework and podspec for CocoaPods (optional)
set(MACOSX_FRAMEWORK_NAME "onnxruntime")
set(MACOSX_FRAMEWORK_IDENTIFIER "com.microsoft.onnxruntime")
# Need to include CoreML as a weaklink for CocoaPods package if the EP is enabled

# Setup weak frameworks for macOS/iOS. 'weak' as the CoreML or WebGPU EPs are optionally enabled.
if(onnxruntime_USE_COREML)
set(APPLE_WEAK_FRAMEWORK "\\\"CoreML\\\"")
list(APPEND _weak_frameworks "\\\"CoreML\\\"")
endif()

if(onnxruntime_USE_WEBGPU)
list(APPEND _weak_frameworks "\\\"QuartzCore\\\"")
list(APPEND _weak_frameworks "\\\"IOSurface\\\"")
list(APPEND _weak_frameworks "\\\"Metal\\\"")
endif()

if (_weak_frameworks)
string(JOIN ", " APPLE_WEAK_FRAMEWORK ${_weak_frameworks})
endif()

set(INFO_PLIST_PATH "${CMAKE_CURRENT_BINARY_DIR}/Info.plist")
configure_file(${REPO_ROOT}/cmake/Info.plist.in ${INFO_PLIST_PATH})
configure_file(
Expand Down Expand Up @@ -202,6 +214,7 @@ set(onnxruntime_INTERNAL_LIBRARIES
${PROVIDERS_RKNPU}
${PROVIDERS_VSINPU}
${PROVIDERS_XNNPACK}
${PROVIDERS_WEBGPU}
${PROVIDERS_WEBNN}
${PROVIDERS_AZURE}
${PROVIDERS_INTERNAL_TESTING}
Expand Down Expand Up @@ -366,16 +379,58 @@ if(onnxruntime_BUILD_APPLE_FRAMEWORK)
endif()
endforeach()

# helper function that recurses to also handle static library dependencies of the ORT external libraries
set(_processed_libs) # keep track of processed libraries to skip any duplicate dependencies
function(add_symlink_for_static_lib_and_dependencies lib)
function(process cur_target)
# de-alias if applicable so a consistent target name is used
get_target_property(alias ${cur_target} ALIASED_TARGET)
if(TARGET ${alias})
set(cur_target ${alias})
endif()

if(${cur_target} IN_LIST _processed_libs OR ${cur_target} IN_LIST lib_and_dependencies)
return()
endif()

list(APPEND lib_and_dependencies ${cur_target})

get_target_property(link_libraries ${cur_target} LINK_LIBRARIES)
foreach(dependency ${link_libraries})
if(TARGET ${dependency})
process(${dependency})
endif()
endforeach()

set(lib_and_dependencies ${lib_and_dependencies} PARENT_SCOPE)
endfunction()

set(lib_and_dependencies)
process(${lib})

foreach(_target ${lib_and_dependencies})
get_target_property(type ${_target} TYPE)
if(${type} STREQUAL "STATIC_LIBRARY")
# message(STATUS "Adding symlink for ${_target}")
add_custom_command(TARGET onnxruntime POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink
$<TARGET_FILE:${_target}> ${STATIC_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:${_target}>)
endif()
endforeach()

list(APPEND _processed_libs ${lib_and_dependencies})
set(_processed_libs ${_processed_libs} PARENT_SCOPE)
endfunction()

# for external libraries we create a symlink to the .a file
foreach(_LIB ${onnxruntime_EXTERNAL_LIBRARIES})
if(NOT TARGET ${_LIB}) # if we didn't build from source. it may not a target
if(NOT TARGET ${_LIB}) # if we didn't build from source it may not be a target
continue()
endif()

GET_TARGET_PROPERTY(_LIB_TYPE ${_LIB} TYPE)
if(_LIB_TYPE STREQUAL "STATIC_LIBRARY")
add_custom_command(TARGET onnxruntime POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink
$<TARGET_FILE:${_LIB}> ${STATIC_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:${_LIB}>)
add_symlink_for_static_lib_and_dependencies(${_LIB})
endif()
endforeach()

Expand Down
5 changes: 4 additions & 1 deletion cmake/onnxruntime_nodejs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ endif()
if (onnxruntime_USE_DML)
set(NODEJS_BINDING_USE_DML "--use_dml")
endif()
if (onnxruntime_USE_WEBGPU)
set(NODEJS_BINDING_USE_WEBGPU "--use_webgpu")
endif()
if (onnxruntime_USE_TENSORRT)
set(NODEJS_BINDING_USE_TENSORRT "--use_tensorrt")
endif()
Expand All @@ -92,7 +95,7 @@ add_custom_target(js_common_npm_ci ALL
add_custom_target(nodejs_binding_wrapper ALL
COMMAND ${NPM_CLI} ci
COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE} --onnxruntime-generator=${CMAKE_GENERATOR}
--arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_TENSORRT}
--arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_WEBGPU} ${NODEJS_BINDING_USE_TENSORRT}
${NODEJS_BINDING_USE_COREML} ${NODEJS_BINDING_USE_QNN}
WORKING_DIRECTORY ${JS_NODE_ROOT}
COMMENT "Using cmake-js to build OnnxRuntime Node.js binding")
Expand Down
14 changes: 0 additions & 14 deletions cmake/onnxruntime_objectivec.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,6 @@ if(NOT onnxruntime_BUILD_SHARED_LIB)
message(FATAL_ERROR "The Objective-C API requires onnxruntime_BUILD_SHARED_LIB to be enabled.")
endif()

check_language(OBJC)
if(CMAKE_OBJC_COMPILER)
enable_language(OBJC)
else()
message(FATAL_ERROR "Objective-C is not supported.")
endif()

check_language(OBJCXX)
if(CMAKE_OBJCXX_COMPILER)
enable_language(OBJCXX)
else()
message(FATAL_ERROR "Objective-C++ is not supported.")
endif()

add_compile_options(
"$<$<COMPILE_LANGUAGE:OBJC,OBJCXX>:-Wall>"
"$<$<COMPILE_LANGUAGE:OBJC,OBJCXX>:-Wextra>")
Expand Down
7 changes: 7 additions & 0 deletions cmake/onnxruntime_providers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ endif()
if(onnxruntime_USE_WEBNN)
set(PROVIDERS_WEBNN onnxruntime_providers_webnn)
endif()
if(onnxruntime_USE_WEBGPU)
set(PROVIDERS_WEBGPU onnxruntime_providers_webgpu)
endif()
if (onnxruntime_USE_CANN)
set(PROVIDERS_CANN onnxruntime_providers_cann)
endif()
Expand Down Expand Up @@ -155,6 +158,10 @@ if (onnxruntime_USE_WEBNN)
include(onnxruntime_providers_webnn.cmake)
endif()

if (onnxruntime_USE_WEBGPU)
include(onnxruntime_providers_webgpu.cmake)
endif()

if (onnxruntime_USE_NNAPI_BUILTIN)
include(onnxruntime_providers_nnapi.cmake)
endif()
Expand Down
5 changes: 5 additions & 0 deletions cmake/onnxruntime_providers_cpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ file(GLOB_RECURSE onnxruntime_js_contrib_ops_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/contrib_ops/js/*.cc"
)

file(GLOB_RECURSE onnxruntime_webgpu_contrib_ops_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.h"
"${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.cc"
)

file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/*.cc"
Expand Down
27 changes: 27 additions & 0 deletions cmake/onnxruntime_providers_webgpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
message(FATAL_ERROR "WebGPU EP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
endif()

add_compile_definitions(USE_WEBGPU=1)
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
add_definitions(-DENABLE_WEBASSEMBLY_THREADS=1)
endif()
file(GLOB_RECURSE onnxruntime_providers_webgpu_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.cc"
)
if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_webgpu_contrib_ops_cc_srcs})
list(APPEND onnxruntime_providers_webgpu_cc_srcs ${onnxruntime_webgpu_contrib_ops_cc_srcs})
endif()

source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webgpu_cc_srcs})
onnxruntime_add_static_library(onnxruntime_providers_webgpu ${onnxruntime_providers_webgpu_cc_srcs})
onnxruntime_add_include_to_target(onnxruntime_providers_webgpu
onnxruntime_common dawn::dawncpp_headers dawn::dawn_headers onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface)
target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native dawn::dawn_proc)

set_target_properties(onnxruntime_providers_webgpu PROPERTIES FOLDER "ONNXRuntime")
1 change: 1 addition & 0 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ target_link_libraries(onnxruntime_pybind11_state PRIVATE
${PROVIDERS_ACL}
${PROVIDERS_ARMNN}
${PROVIDERS_XNNPACK}
${PROVIDERS_WEBGPU}
${PROVIDERS_AZURE}
${PROVIDERS_QNN}
onnxruntime_optimizer
Expand Down
12 changes: 12 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,10 @@ if(onnxruntime_USE_JSEP)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js)
endif()

if(onnxruntime_USE_WEBGPU)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu)
endif()

if(onnxruntime_USE_RKNPU)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_rknpu)
endif()
Expand Down Expand Up @@ -608,6 +612,7 @@ set(ONNXRUNTIME_TEST_LIBS
${PROVIDERS_NNAPI}
${PROVIDERS_VSINPU}
${PROVIDERS_JS}
${PROVIDERS_WEBGPU}
${PROVIDERS_QNN}
${PROVIDERS_SNPE}
${PROVIDERS_RKNPU}
Expand Down Expand Up @@ -670,6 +675,13 @@ if(onnxruntime_USE_JSEP)
list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_js)
endif()

if(onnxruntime_USE_WEBGPU)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/webgpu/*)
list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_webgpu)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu)
list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_webgpu)
endif()

# QNN EP tests require CPU EP op implementations for accuracy evaluation, so disable on minimal
# or reduced op builds.
if(onnxruntime_USE_QNN AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
Expand Down
Loading

0 comments on commit e1b8ce4

Please sign in to comment.