Initial WebGPU EP checkin (microsoft#22318)

This change introduces the WebGPU EP into ONNX Runtime. To make the PR as simple as possible, this PR excluded the following: - C API changes for WebGPU EP - actual implementation of WebGPU EP. Currently in this PR, WebGPU is a stub implementation that does not register any kernel. - Python IO Binding update - Node.js IO Binding update This PR now contains only 43 file changes (while the working branch contains 130+) and hopefully this makes it easier to review. There is going to be separated PRs for each mentioned above. Current working branch: microsoft#21904
ishwar-raut1 · Nov 19, 2024 · e1b8ce4 · e1b8ce4
1 parent c793f68
commit e1b8ce4
Show file tree

Hide file tree

Showing 41 changed files with 748 additions and 56 deletions.
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -148,6 +148,7 @@ option(onnxruntime_TVM_USE_LLVM "Build TVM with LLVM. Set customized path to llv
 option(onnxruntime_TVM_USE_HASH "Build ipp-crypto library for support hash algorithm. It is defined for TVM only")
 option(onnxruntime_USE_XNNPACK "Build with XNNPACK support. Provides an alternative math library on ARM, WebAssembly and x86." OFF)
 option(onnxruntime_USE_WEBNN "Build with WebNN support. Enable hardware acceleration in web browsers." OFF)
+option(onnxruntime_USE_WEBGPU "Build with WebGPU support. Enable WebGPU via C/C++ interface." OFF)
 
 # Options related to reducing the binary size produced by the build
 # XNNPACK EP requires the internal NHWC contrib ops to be available, so this option must be OFF when onnxruntime_USE_XNNPACK is ON
@@ -490,6 +491,22 @@ if (onnxruntime_BUILD_CSHARP)
   endif()
 endif()
 
+if (onnxruntime_BUILD_OBJC)
+  check_language(OBJC)
+  if(CMAKE_OBJC_COMPILER)
+      enable_language(OBJC)
+  else()
+      message(FATAL_ERROR "Objective-C is not supported.")
+  endif()
+
+  check_language(OBJCXX)
+  if(CMAKE_OBJCXX_COMPILER)
+      enable_language(OBJCXX)
+  else()
+      message(FATAL_ERROR "Objective-C++ is not supported.")
+  endif()
+endif()
+
 if (NOT WIN32)
   #TODO: On Linux we may try https://github.com/microsoft/TraceLogging.git
   if (onnxruntime_ENABLE_INSTRUMENT)
@@ -917,6 +934,11 @@ if (onnxruntime_USE_WEBNN)
   list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBNN=1)
   list(APPEND ONNXRUNTIME_PROVIDER_NAMES webnn)
 endif()
+if (onnxruntime_USE_WEBGPU)
+  list(APPEND ORT_PROVIDER_FLAGS -DUSE_WEBGPU=1)
+  list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBGPU=1)
+  list(APPEND ONNXRUNTIME_PROVIDER_NAMES webgpu)
+endif()
 if (onnxruntime_USE_CANN)
     list(APPEND ORT_PROVIDER_FLAGS  -DUSE_CANN=1)
     list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CANN=1)

diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
@@ -576,10 +576,11 @@ if (onnxruntime_USE_MIMALLOC)
   onnxruntime_fetchcontent_makeavailable(mimalloc)
 endif()
 
-#onnxruntime_EXTERNAL_LIBRARIES could contain onnx, onnx_proto,libprotobuf, cuda/cudnn,
-# dnnl/mklml, onnxruntime_codegen_tvm, tvm and pthread
-# pthread is always at the last
-set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date ${ONNXRUNTIME_CLOG_TARGET_NAME})
+set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json
+                                   onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface
+                                   flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date
+                                   ${ONNXRUNTIME_CLOG_TARGET_NAME})
+
 # The source code of onnx_proto is generated, we must build this lib first before starting to compile the other source code that uses ONNX protobuf types.
 # The other libs do not have the problem. All the sources are already there. We can compile them in any order.
 set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto flatbuffers::flatbuffers)
@@ -634,24 +635,73 @@ if (onnxruntime_USE_COREML)
   FetchContent_Populate(coremltools)
 endif()
 
-message(STATUS "Finished fetching external dependencies")
+if (onnxruntime_USE_WEBGPU)
+  FetchContent_Declare(
+    dawn
+    URL ${DEP_URL_dawn}
+    URL_HASH SHA1=${DEP_SHA1_dawn}
+    PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch
+  )
 
-set(onnxruntime_LINK_DIRS )
+  # use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size
+  set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE)
+  set(DAWN_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
+  set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE)
+  set(DAWN_ENABLE_NULL OFF CACHE BOOL "" FORCE)
+  set(DAWN_FETCH_DEPENDENCIES ON CACHE BOOL "" FORCE)
+
+  # disable things we don't use
+  set(DAWN_DXC_ENABLE_ASSERTS_IN_NDEBUG OFF)
+  set(DAWN_ENABLE_DESKTOP_GL OFF CACHE BOOL "" FORCE)
+  set(DAWN_ENABLE_OPENGLES OFF CACHE BOOL "" FORCE)
+  set(DAWN_SUPPORTS_GLFW_FOR_WINDOWING OFF CACHE BOOL "" FORCE)
+  set(DAWN_USE_GLFW OFF CACHE BOOL "" FORCE)
+  set(DAWN_USE_WINDOWS_UI OFF CACHE BOOL "" FORCE)
+  set(DAWN_USE_X11 OFF CACHE BOOL "" FORCE)
+
+  set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+  set(TINT_BUILD_CMD_TOOLS OFF CACHE BOOL "" FORCE)
+  set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE)
+  set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE)
+  set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE)
+  set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE)  # don't need. disabling is a large binary size saving
+  set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE)  # needed to create cache key. runtime error if not enabled.
+
+  # SPIR-V validation shouldn't be required given we're using Tint to create the SPIR-V.
+  set(DAWN_ENABLE_SPIRV_VALIDATION OFF CACHE BOOL "" FORCE)
+
+  if (WIN32)
+    # building this requires the HLSL writer to be enabled in Tint. TBD if that we need either of these to be ON.
+    set(DAWN_USE_BUILT_DXC ON CACHE BOOL "" FORCE)
+    set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "" FORCE)
+
+    # Vulkan may optionally be included in a Windows build. Exclude until we have an explicit use case that requires it.
+    set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE)
+  endif()
+
+  onnxruntime_fetchcontent_makeavailable(dawn)
 
+  list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_native dawn::dawn_proc)
+endif()
+
+set(onnxruntime_LINK_DIRS)
 if (onnxruntime_USE_CUDA)
-      find_package(CUDAToolkit REQUIRED)
+  find_package(CUDAToolkit REQUIRED)
 
-      if(onnxruntime_CUDNN_HOME)
-        file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
-        set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
-      endif()
-      include(cuDNN)
+  if(onnxruntime_CUDNN_HOME)
+    file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
+    set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
+  endif()
+
+  include(cuDNN)
 endif()
 
 if(onnxruntime_USE_SNPE)
-    include(external/find_snpe.cmake)
-    list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS})
+  include(external/find_snpe.cmake)
+  list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS})
 endif()
 
-FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}  ORT_BINARY_DIR)
-FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR}  ORT_SOURCE_DIR)
+FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR)
+FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR)
+
+message(STATUS "Finished fetching external dependencies")
diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake
@@ -90,10 +90,22 @@ elseif(onnxruntime_BUILD_APPLE_FRAMEWORK)
   # create Info.plist for the framework and podspec for CocoaPods (optional)
   set(MACOSX_FRAMEWORK_NAME "onnxruntime")
   set(MACOSX_FRAMEWORK_IDENTIFIER "com.microsoft.onnxruntime")
-  # Need to include CoreML as a weaklink for CocoaPods package if the EP is enabled
+
+  # Setup weak frameworks for macOS/iOS. 'weak' as the CoreML or WebGPU EPs are optionally enabled.
   if(onnxruntime_USE_COREML)
-    set(APPLE_WEAK_FRAMEWORK "\\\"CoreML\\\"")
+    list(APPEND _weak_frameworks "\\\"CoreML\\\"")
+  endif()
+
+  if(onnxruntime_USE_WEBGPU)
+    list(APPEND _weak_frameworks "\\\"QuartzCore\\\"")
+    list(APPEND _weak_frameworks "\\\"IOSurface\\\"")
+    list(APPEND _weak_frameworks "\\\"Metal\\\"")
   endif()
+
+  if (_weak_frameworks)
+    string(JOIN ", " APPLE_WEAK_FRAMEWORK ${_weak_frameworks})
+  endif()
+
   set(INFO_PLIST_PATH "${CMAKE_CURRENT_BINARY_DIR}/Info.plist")
   configure_file(${REPO_ROOT}/cmake/Info.plist.in ${INFO_PLIST_PATH})
   configure_file(
@@ -202,6 +214,7 @@ set(onnxruntime_INTERNAL_LIBRARIES
   ${PROVIDERS_RKNPU}
   ${PROVIDERS_VSINPU}
   ${PROVIDERS_XNNPACK}
+  ${PROVIDERS_WEBGPU}
   ${PROVIDERS_WEBNN}
   ${PROVIDERS_AZURE}
   ${PROVIDERS_INTERNAL_TESTING}
@@ -366,16 +379,58 @@ if(onnxruntime_BUILD_APPLE_FRAMEWORK)
     endif()
   endforeach()
 
+  # helper function that recurses to also handle static library dependencies of the ORT external libraries
+  set(_processed_libs)  # keep track of processed libraries to skip any duplicate dependencies
+  function(add_symlink_for_static_lib_and_dependencies lib)
+    function(process cur_target)
+      # de-alias if applicable so a consistent target name is used
+      get_target_property(alias ${cur_target} ALIASED_TARGET)
+      if(TARGET ${alias})
+        set(cur_target ${alias})
+      endif()
+
+      if(${cur_target} IN_LIST _processed_libs OR ${cur_target} IN_LIST lib_and_dependencies)
+        return()
+      endif()
+
+      list(APPEND lib_and_dependencies ${cur_target})
+
+      get_target_property(link_libraries ${cur_target} LINK_LIBRARIES)
+      foreach(dependency ${link_libraries})
+        if(TARGET ${dependency})
+          process(${dependency})
+        endif()
+      endforeach()
+
+      set(lib_and_dependencies ${lib_and_dependencies} PARENT_SCOPE)
+    endfunction()
+
+    set(lib_and_dependencies)
+    process(${lib})
+
+    foreach(_target ${lib_and_dependencies})
+      get_target_property(type ${_target} TYPE)
+      if(${type} STREQUAL "STATIC_LIBRARY")
+        # message(STATUS "Adding symlink for ${_target}")
+        add_custom_command(TARGET onnxruntime POST_BUILD
+                           COMMAND ${CMAKE_COMMAND} -E create_symlink
+                             $<TARGET_FILE:${_target}> ${STATIC_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:${_target}>)
+      endif()
+    endforeach()
+
+    list(APPEND _processed_libs ${lib_and_dependencies})
+    set(_processed_libs ${_processed_libs} PARENT_SCOPE)
+  endfunction()
+
   # for external libraries we create a symlink to the .a file
   foreach(_LIB ${onnxruntime_EXTERNAL_LIBRARIES})
-    if(NOT TARGET ${_LIB}) # if we didn't build from source. it may not a target
+    if(NOT TARGET ${_LIB}) # if we didn't build from source it may not be a target
       continue()
     endif()
+
     GET_TARGET_PROPERTY(_LIB_TYPE ${_LIB} TYPE)
     if(_LIB_TYPE STREQUAL "STATIC_LIBRARY")
-      add_custom_command(TARGET onnxruntime POST_BUILD
-                         COMMAND ${CMAKE_COMMAND} -E create_symlink
-                           $<TARGET_FILE:${_LIB}> ${STATIC_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:${_LIB}>)
+      add_symlink_for_static_lib_and_dependencies(${_LIB})
     endif()
   endforeach()
 

diff --git a/cmake/onnxruntime_nodejs.cmake b/cmake/onnxruntime_nodejs.cmake
@@ -67,6 +67,9 @@ endif()
 if (onnxruntime_USE_DML)
     set(NODEJS_BINDING_USE_DML "--use_dml")
 endif()
+if (onnxruntime_USE_WEBGPU)
+    set(NODEJS_BINDING_USE_WEBGPU "--use_webgpu")
+endif()
 if (onnxruntime_USE_TENSORRT)
     set(NODEJS_BINDING_USE_TENSORRT "--use_tensorrt")
 endif()
@@ -92,7 +95,7 @@ add_custom_target(js_common_npm_ci ALL
 add_custom_target(nodejs_binding_wrapper ALL
     COMMAND ${NPM_CLI} ci
     COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE} --onnxruntime-generator=${CMAKE_GENERATOR}
-        --arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_TENSORRT}
+        --arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_WEBGPU} ${NODEJS_BINDING_USE_TENSORRT}
         ${NODEJS_BINDING_USE_COREML} ${NODEJS_BINDING_USE_QNN}
     WORKING_DIRECTORY ${JS_NODE_ROOT}
     COMMENT "Using cmake-js to build OnnxRuntime Node.js binding")

diff --git a/cmake/onnxruntime_objectivec.cmake b/cmake/onnxruntime_objectivec.cmake
@@ -9,20 +9,6 @@ if(NOT onnxruntime_BUILD_SHARED_LIB)
     message(FATAL_ERROR "The Objective-C API requires onnxruntime_BUILD_SHARED_LIB to be enabled.")
 endif()
 
-check_language(OBJC)
-if(CMAKE_OBJC_COMPILER)
-    enable_language(OBJC)
-else()
-    message(FATAL_ERROR "Objective-C is not supported.")
-endif()
-
-check_language(OBJCXX)
-if(CMAKE_OBJCXX_COMPILER)
-    enable_language(OBJCXX)
-else()
-    message(FATAL_ERROR "Objective-C++ is not supported.")
-endif()
-
 add_compile_options(
     "$<$<COMPILE_LANGUAGE:OBJC,OBJCXX>:-Wall>"
     "$<$<COMPILE_LANGUAGE:OBJC,OBJCXX>:-Wextra>")

diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
@@ -114,6 +114,9 @@ endif()
 if(onnxruntime_USE_WEBNN)
   set(PROVIDERS_WEBNN onnxruntime_providers_webnn)
 endif()
+if(onnxruntime_USE_WEBGPU)
+  set(PROVIDERS_WEBGPU onnxruntime_providers_webgpu)
+endif()
 if (onnxruntime_USE_CANN)
   set(PROVIDERS_CANN onnxruntime_providers_cann)
 endif()
@@ -155,6 +158,10 @@ if (onnxruntime_USE_WEBNN)
   include(onnxruntime_providers_webnn.cmake)
 endif()
 
+if (onnxruntime_USE_WEBGPU)
+  include(onnxruntime_providers_webgpu.cmake)
+endif()
+
 if (onnxruntime_USE_NNAPI_BUILTIN)
   include(onnxruntime_providers_nnapi.cmake)
 endif()

diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake
@@ -40,6 +40,11 @@ file(GLOB_RECURSE onnxruntime_js_contrib_ops_cc_srcs CONFIGURE_DEPENDS
   "${ONNXRUNTIME_ROOT}/contrib_ops/js/*.cc"
 )
 
+file(GLOB_RECURSE onnxruntime_webgpu_contrib_ops_cc_srcs CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.h"
+  "${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.cc"
+)
+
 file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
   "${ONNXRUNTIME_ROOT}/core/providers/*.h"
   "${ONNXRUNTIME_ROOT}/core/providers/*.cc"

diff --git a/cmake/onnxruntime_providers_webgpu.cmake b/cmake/onnxruntime_providers_webgpu.cmake
@@ -0,0 +1,27 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+  if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
+    message(FATAL_ERROR "WebGPU EP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
+  endif()
+
+  add_compile_definitions(USE_WEBGPU=1)
+  if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+    add_definitions(-DENABLE_WEBASSEMBLY_THREADS=1)
+  endif()
+  file(GLOB_RECURSE onnxruntime_providers_webgpu_cc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.cc"
+  )
+  if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
+    source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_webgpu_contrib_ops_cc_srcs})
+    list(APPEND onnxruntime_providers_webgpu_cc_srcs ${onnxruntime_webgpu_contrib_ops_cc_srcs})
+  endif()
+
+  source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webgpu_cc_srcs})
+  onnxruntime_add_static_library(onnxruntime_providers_webgpu ${onnxruntime_providers_webgpu_cc_srcs})
+  onnxruntime_add_include_to_target(onnxruntime_providers_webgpu
+    onnxruntime_common dawn::dawncpp_headers dawn::dawn_headers onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+  target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native dawn::dawn_proc)
+
+  set_target_properties(onnxruntime_providers_webgpu PROPERTIES FOLDER "ONNXRuntime")
diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake
@@ -178,6 +178,7 @@ target_link_libraries(onnxruntime_pybind11_state PRIVATE
     ${PROVIDERS_ACL}
     ${PROVIDERS_ARMNN}
     ${PROVIDERS_XNNPACK}
+    ${PROVIDERS_WEBGPU}
     ${PROVIDERS_AZURE}
     ${PROVIDERS_QNN}
     onnxruntime_optimizer

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -563,6 +563,10 @@ if(onnxruntime_USE_JSEP)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js)
 endif()
 
+if(onnxruntime_USE_WEBGPU)
+  list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu)
+endif()
+
 if(onnxruntime_USE_RKNPU)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_rknpu)
 endif()
@@ -608,6 +612,7 @@ set(ONNXRUNTIME_TEST_LIBS
     ${PROVIDERS_NNAPI}
     ${PROVIDERS_VSINPU}
     ${PROVIDERS_JS}
+    ${PROVIDERS_WEBGPU}
     ${PROVIDERS_QNN}
     ${PROVIDERS_SNPE}
     ${PROVIDERS_RKNPU}
@@ -670,6 +675,13 @@ if(onnxruntime_USE_JSEP)
   list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_js)
 endif()
 
+if(onnxruntime_USE_WEBGPU)
+  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/webgpu/*)
+  list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_webgpu)
+  list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu)
+  list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_webgpu)
+endif()
+
 # QNN EP tests require CPU EP op implementations for accuracy evaluation, so disable on minimal
 # or reduced op builds.
 if(onnxruntime_USE_QNN AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)