intel · jatinwadhwa921 · Apr 8, 2025 · Apr 4, 2025 · Apr 4, 2025 · Apr 4, 2025
diff --git a/.github/actions/webgpu-validate-shader-key/action.yml b/.github/actions/webgpu-validate-shader-key/action.yml
@@ -22,7 +22,7 @@ runs:
       working-directory: ${{ github.action_path }}
 
     - name: Validate shader keys (native log)
-      if: ${{ !inputs.is_chromium_log != 'true' }}
+      if: ${{ inputs.is_chromium_log != 'true' }}
       shell: cmd
       run: |
         node validate-shader-key.js < "${{ inputs.log_file_path }}"

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -26,7 +26,7 @@ jobs:
           level: info
           filter_mode: diff_context
       - name: shellcheck # Static check shell scripts
-        uses: reviewdog/action-shellcheck@v1
+        uses: reviewdog/action-shellcheck@v1.30.0
         with:
           github_token: ${{ secrets.github_token }}
           reporter: github-pr-check

diff --git a/.github/workflows/windows-web-ci-workflow.yml b/.github/workflows/windows-web-ci-workflow.yml
@@ -200,7 +200,6 @@ jobs:
 
       - name: Validate shader keys - WebGPU EP
         if: ${{ inputs.run_webgpu_tests == true && inputs.build_config == 'Debug' }}
-        continue-on-error: true
         uses: ./.github/actions/webgpu-validate-shader-key
         with:
           log_file_path: ${{ runner.temp }}\web\test\07\chrome_debug.log

diff --git a/.github/workflows/windows_webgpu.yml b/.github/workflows/windows_webgpu.yml
@@ -126,7 +126,6 @@ jobs:
           dir ${{ github.workspace }}\RelWithDebInfo\RelWithDebInfo\onnxruntime_test_all_stderr.log
 
       - name: Validate shader keys
-        continue-on-error: true
         uses: ./.github/actions/webgpu-validate-shader-key
         with:
           log_file_path: ${{ github.workspace }}\RelWithDebInfo\RelWithDebInfo\onnxruntime_test_all_stderr.log

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs
@@ -3,6 +3,7 @@
 
 using System;
 using System.Runtime.InteropServices;
+using static Microsoft.ML.OnnxRuntime.NativeMethods;
 
 namespace Microsoft.ML.OnnxRuntime
 {
@@ -325,6 +326,16 @@ public struct OrtApi
         public IntPtr CreateLoraAdapterFromArray;
         public IntPtr ReleaseLoraAdapter;
         public IntPtr RunOptionsAddActiveLoraAdapter;
+        public IntPtr SetEpDynamicOptions;
+        public IntPtr ReleaseValueInfo;
+        public IntPtr ReleaseNode;
+        public IntPtr ReleaseGraph;
+        public IntPtr ReleaseModel;
+        public IntPtr GetValueInfoName;
+        public IntPtr GetValueInfoTypeInfo;
+        public IntPtr GetModelEditorApi;
+        public IntPtr CreateTensorWithDataAndDeleterAsOrtValue;
+        public IntPtr SessionOptionsSetLoadCancellationFlag;
     }
 
     internal static class NativeMethods
@@ -404,6 +415,7 @@ static NativeMethods()
             OrtReleaseSessionOptions = (DOrtReleaseSessionOptions)Marshal.GetDelegateForFunctionPointer(api_.ReleaseSessionOptions, typeof(DOrtReleaseSessionOptions));
             OrtCloneSessionOptions = (DOrtCloneSessionOptions)Marshal.GetDelegateForFunctionPointer(api_.CloneSessionOptions, typeof(DOrtCloneSessionOptions));
             OrtSetSessionExecutionMode = (DOrtSetSessionExecutionMode)Marshal.GetDelegateForFunctionPointer(api_.SetSessionExecutionMode, typeof(DOrtSetSessionExecutionMode));
+            OrtSessionOptionsSetLoadCancellationFlag = (DOrtSessionOptionsSetLoadCancellationFlag)Marshal.GetDelegateForFunctionPointer(api_.SessionOptionsSetLoadCancellationFlag, typeof(DOrtSessionOptionsSetLoadCancellationFlag));
             OrtSetOptimizedModelFilePath = (DOrtSetOptimizedModelFilePath)Marshal.GetDelegateForFunctionPointer(api_.SetOptimizedModelFilePath, typeof(DOrtSetOptimizedModelFilePath));
             OrtEnableProfiling = (DOrtEnableProfiling)Marshal.GetDelegateForFunctionPointer(api_.EnableProfiling, typeof(DOrtEnableProfiling));
             OrtDisableProfiling = (DOrtDisableProfiling)Marshal.GetDelegateForFunctionPointer(api_.DisableProfiling, typeof(DOrtDisableProfiling));
@@ -1025,6 +1037,12 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
                                                                             ExecutionMode execution_mode);
         public static DOrtSetSessionExecutionMode OrtSetSessionExecutionMode;
 
+        [UnmanagedFunctionPointer(CallingConvention.Winapi)]
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionOptionsSetLoadCancellationFlag(IntPtr /*(OrtSessionOptions*)*/ options,
+                                                                        bool value);
+        public static DOrtSessionOptionsSetLoadCancellationFlag OrtSessionOptionsSetLoadCancellationFlag;
+
+
         [UnmanagedFunctionPointer(CallingConvention.Winapi)]
         public delegate IntPtr /*(OrtStatus*)*/ DOrtSetOptimizedModelFilePath(IntPtr /* OrtSessionOptions* */ options, byte[] optimizedModelFilepath);
         public static DOrtSetOptimizedModelFilePath OrtSetOptimizedModelFilePath;

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs
@@ -802,6 +802,16 @@ public ExecutionMode ExecutionMode
         }
         private ExecutionMode _executionMode = ExecutionMode.ORT_SEQUENTIAL;
 
+        /// <summary>
+        /// Sets the load cancellation flag for the session. Default is set to false.
+        /// Provides an opportunity for the user to cancel model loading.
+        /// </summary>
+        /// <param name="value">true to request cancellation, false to proceed</param>
+        public void SetLoadCancellationFlag(bool value)
+        {
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsSetLoadCancellationFlag(handle, value));
+        }
+
         #endregion
 
         #region Private Methods

diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md
@@ -2039,10 +2039,11 @@ This version of the operator has been available since version 1 of the 'com.micr
     1. Input `data` is a constant. It is quantized block-wise along attribute `quantize_axis` with block size specified by attribute `block_size`.
        `block_size must` be a power of 2 and not smaller than 16, like 16, 32, 64, 128, ..
     2. Input `data`'s scale and zero point are specified by input `scales` and `zero_points`. `scales` and `zero_points` are also constants.
-       If `zero_points` is not provided, 0 is the zero point.
+       If `zero_points` is not provided, 0 is the zero point except when data is uint8 type then the default zero point is 8.
     3. During the op execution, `data` and `indices` are first used to generate the quantized output. Then, `scales` and `zero_points` are used
        to dequantize the output.
     4. The `output` and `scales` have the same type. The `data` and `zero_points` have the same type.
+    5. For uint8 data, the `gather_axis` must be 0.
 
 #### Version
 
@@ -2082,7 +2083,7 @@ This version of the operator has been available since version 1 of the 'com.micr
 #### Type Constraints
 
 <dl>
-<dt><tt>T1</tt> : tensor(int4), tensor(uint4)</dt>
+<dt><tt>T1</tt> : tensor(int4), tensor(uint4), tensor(uint8)</dt>
 <dd>Constrain quantized types.</dd>
 <dt><tt>T2</tt> : tensor(float), tensor(float16), tensor(bfloat16)</dt>
 <dd>Constrain dequantized types.</dd>

diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md
@@ -515,7 +515,7 @@ Do not modify directly.*
 |FusedConv|*in* X:**T**<br> *in* W:**T**<br> *in* B:**T**<br> *in* Z:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
 |FusedGemm|*in* A:**T**<br> *in* B:**T**<br> *in* C:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
 |FusedMatMul|*in* A:**T**<br> *in* B:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
-|GatherBlockQuantized|*in* data:**T1**<br> *in* indices:**Tind**<br> *in* scales:**T2**<br> *in* zero_points:**T1**<br> *out* output:**T2**|1+|**T1** = tensor(int4), tensor(uint4)<br/> **T2** = tensor(float), tensor(float16)<br/> **Tind** = tensor(int32), tensor(int64)|
+|GatherBlockQuantized|*in* data:**T1**<br> *in* indices:**Tind**<br> *in* scales:**T2**<br> *in* zero_points:**T1**<br> *out* output:**T2**|1+|**T1** = tensor(int4), tensor(uint4), tensor(uint8)<br/> **T2** = tensor(float), tensor(float16)<br/> **Tind** = tensor(int32), tensor(int64)|
 |GatherND|*in* data:**T**<br> *in* indices:**Tind**<br> *out* output:**T**|1+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)<br/> **Tind** = tensor(int32), tensor(int64)|
 |Gelu|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
 |GreedySearch|*in* input_ids:**I**<br> *in* max_length:**I**<br> *in* min_length:**I**<br> *in* repetition_penalty:**T**<br> *in* vocab_mask:**I**<br> *in* prefix_vocab_mask:**I**<br> *in* attention_mask:**I**<br> *out* sequences:**I**|1+|**T** = tensor(float)|

diff --git a/include/onnxruntime/core/common/common.h b/include/onnxruntime/core/common/common.h
@@ -148,6 +148,26 @@ void LogRuntimeError(uint32_t session_id, const common::Status& status, const ch
     abort();                                                                                       \
   } while (false)
 
+#define ORT_THROW_FROM_STATUS(status)                \
+  do {                                               \
+    ::onnxruntime::PrintFinalMessage(                \
+        ::onnxruntime::OnnxRuntimeException(         \
+            ORT_WHERE_WITH_STACK, status.ToString()) \
+            .what());                                \
+    abort();                                         \
+  } while (false)
+
+#define ORT_THROW_WITH_CATEGORY_AND_CODE(category, code, ...)                       \
+  do {                                                                              \
+    ::onnxruntime::PrintFinalMessage(                                               \
+        ::onnxruntime::OnnxRuntimeException(ORT_WHERE_WITH_STACK,                   \
+                                            ::onnxruntime::MakeString(__VA_ARGS__), \
+                                            ::onnxruntime::common::category,        \
+                                            ::onnxruntime::common::code)            \
+            .what());                                                               \
+    abort();                                                                        \
+  } while (false)
+
 #else
 
 #define ORT_TRY try
@@ -180,6 +200,16 @@ void LogRuntimeError(uint32_t session_id, const common::Status& status, const ch
 #define ORT_THROW_EX(ex, ...) \
   throw ex(__VA_ARGS__)
 
+#define ORT_THROW_FROM_STATUS(status)                                                                   \
+  throw ::onnxruntime::OnnxRuntimeException(ORT_WHERE_WITH_STACK, status.ToString(), status.Category(), \
+                                            static_cast<::onnxruntime::common::StatusCode>(status.Code()))
+
+#define ORT_THROW_WITH_CATEGORY_AND_CODE(category, code, ...)                       \
+  throw ::onnxruntime::OnnxRuntimeException(ORT_WHERE_WITH_STACK,                   \
+                                            ::onnxruntime::MakeString(__VA_ARGS__), \
+                                            ::onnxruntime::common::category,        \
+                                            ::onnxruntime::common::code)
+
 #endif
 
 #define ORT_MAKE_STATUS(category, code, ...)                     \
@@ -237,7 +267,7 @@ void LogRuntimeError(uint32_t session_id, const common::Status& status, const ch
     auto _status = (expr);                                                                                    \
     if ((!_status.IsOK())) {                                                                                  \
       ::onnxruntime::LogRuntimeError(0, _status, __FILE__, static_cast<const char*>(__FUNCTION__), __LINE__); \
-      ORT_THROW(_status);                                                                                     \
+      ORT_THROW_FROM_STATUS(_status);                                                                         \
     }                                                                                                         \
   } while (0)
 

diff --git a/include/onnxruntime/core/common/exceptions.h b/include/onnxruntime/core/common/exceptions.h
@@ -11,6 +11,7 @@
 #include <vector>
 
 #include "core/common/common.h"
+#include "core/common/status.h"
 #include "core/common/code_location.h"
 
 namespace onnxruntime {
@@ -35,12 +36,44 @@ class OnnxRuntimeException : public std::exception {
   /**
      Create a new exception that captures the location it was thrown from.
      @param location Location in the source code the exception is being thrown from
+     @param msg Message containing additional information about the exception cause.
+     @param category Error category
+     @param code Error code
+  */
+
+  OnnxRuntimeException(const CodeLocation& location,
+                       const std::string& message,
+                       common::StatusCategory category,
+                       common::StatusCode code) noexcept
+      : OnnxRuntimeException(location, nullptr, message, category, code) {
+  }
+
+  /**
+     Create a new exception that captures the location it was thrown from.
+     The instance will be created with ONNXRUNTIME category and FAIL code.
+     @param location Location in the source code the exception is being thrown from
      @param failed_condition Optional string containing the condition that failed.
      e.g. "tensor.Size() == input.Size()". May be nullptr.
      @param msg Message containing additional information about the exception cause.
   */
-  OnnxRuntimeException(const CodeLocation& location, const char* failed_condition, const std::string& msg)
-      : location_{location} {
+  OnnxRuntimeException(const CodeLocation& location, const char* failed_condition, const std::string& msg) noexcept
+      : OnnxRuntimeException(location, failed_condition, msg,
+                             common::StatusCategory::ONNXRUNTIME, common::StatusCode::FAIL) {
+  }
+
+  /**
+     Create a new exception that captures the location it was thrown from.
+     @param location Location in the source code the exception is being thrown from
+     @param failed_condition Optional string containing the condition that failed.
+     e.g. "tensor.Size() == input.Size()". May be nullptr.
+     @param msg Message containing additional information about the exception cause.
+     @param category Error category
+     @param code Error code
+  */
+  OnnxRuntimeException(const CodeLocation& location, const char* failed_condition, const std::string& msg,
+                       common::StatusCategory category,
+                       common::StatusCode code)
+      : location_{location}, category_(category), code_(code) {
     std::ostringstream ss;
 
     ss << location.ToString(CodeLocation::kFilenameAndPath);  // output full path in case just the filename is ambiguous
@@ -58,6 +91,14 @@ class OnnxRuntimeException : public std::exception {
     what_ = ss.str();
   }
 
+  common::StatusCategory Category() const noexcept {
+    return category_;
+  }
+
+  common::StatusCode Code() const noexcept {
+    return code_;
+  }
+
   const char* what() const noexcept override {
     return what_.c_str();
   }
@@ -66,6 +107,8 @@ class OnnxRuntimeException : public std::exception {
   const CodeLocation location_;
   const std::vector<std::string> stacktrace_;
   std::string what_;
+  common::StatusCategory category_;
+  common::StatusCode code_;
 };
 
 }  // namespace onnxruntime
diff --git a/include/onnxruntime/core/common/status.h b/include/onnxruntime/core/common/status.h
@@ -43,7 +43,8 @@
   MODEL_LOADED = 8,
   NOT_IMPLEMENTED = 9,
   INVALID_GRAPH = 10,
-  EP_FAIL = 11
+  EP_FAIL = 11,
+  MODEL_LOAD_CANCELED = 12,
 };
 
 constexpr const char* StatusCodeToString(StatusCode status) noexcept {
@@ -72,6 +73,8 @@
       return "INVALID_GRAPH";
     case StatusCode::EP_FAIL:
       return "EP_FAIL";
+    case StatusCode::MODEL_LOAD_CANCELED:
+      return "MODEL_LOAD_CANCELED";
     default:
       return "GENERAL ERROR";
   }
@@ -104,6 +107,8 @@
       return HRESULT_FROM_WIN32(ERROR_FILE_CORRUPT);
     case StatusCode::EP_FAIL:
       return HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR);
+    case StatusCode::MODEL_LOAD_CANCELED:
+      return HRESULT_FROM_WIN32(ERROR_CANCELLED);
     default:
       return E_FAIL;
   }

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -255,6 +255,7 @@ typedef enum OrtErrorCode {
   ORT_NOT_IMPLEMENTED,
   ORT_INVALID_GRAPH,
   ORT_EP_FAIL,
+  ORT_MODEL_LOAD_CANCELED,
 } OrtErrorCode;
 
 typedef enum OrtOpAttrType {
@@ -4898,6 +4899,24 @@ struct OrtApi {
                   _In_ const int64_t* shape, size_t shape_len,
                   ONNXTensorElementDataType type,
                   _Outptr_ OrtValue** out);
+
+  /** \brief sets load cancellation flag to abort session loading process.
+   *
+   * \param[in] options instance that was passed to the session at creation time.
+   * \param[in] cancel setting this to true after model loading process was initiated will
+   *            attempt to cancel the loading process. If cancellation is successful, CreateSession()
+   *            CreateSessionFromArray() or any other session creation API that take session options as an
+   *            argument will return an OrtStatus indicating that session loading was canceled at user request,
+   *            error code ORT_MODEL_LOAD_CANCELED.
+   *            The APIs above would not return any valid Session instance. This is the best case effort and the result
+   *            is not guaranteed. The session may have already been created and initialized
+   *            before the cancellation request was issued.
+   *
+   * \snippet{doc} snippets.dox OrtStatus
+   *
+   */
+  ORT_API2_STATUS(SessionOptionsSetLoadCancellationFlag, _Inout_ OrtSessionOptions* options,
+                  _In_ bool cancel);
 };
 
 /*

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -928,6 +928,8 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl<T> {
 
   SessionOptionsImpl& SetExecutionMode(ExecutionMode execution_mode);  ///< Wraps OrtApi::SetSessionExecutionMode
 
+  SessionOptionsImpl& SetLoadCancellationFlag(bool value);  ///< Wraps OrtApi::SessionOptionsSetLoadCancellationFlag
+
   SessionOptionsImpl& SetLogId(const char* logid);     ///< Wraps OrtApi::SetSessionLogId
   SessionOptionsImpl& SetLogSeverityLevel(int level);  ///< Wraps OrtApi::SetSessionLogSeverityLevel
 

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -747,6 +747,12 @@ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetExecutionMode(ExecutionM
   return *this;
 }
 
+template <typename T>
+inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetLoadCancellationFlag(bool value) {
+  ThrowOnError(GetApi().SessionOptionsSetLoadCancellationFlag(this->p_, value));
+  return *this;
+}
+
 template <typename T>
 inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetLogId(const char* logid) {
   ThrowOnError(GetApi().SetSessionLogId(this->p_, logid));

diff --git a/js/web/test/e2e/exports/testcases/vite-default/package-lock.json b/js/web/test/e2e/exports/testcases/vite-default/package-lock.json
diff --git a/js/web/test/e2e/exports/testcases/vite-default/package.json b/js/web/test/e2e/exports/testcases/vite-default/package.json
@@ -13,6 +13,6 @@
   },
   "devDependencies": {
     "@vitejs/plugin-vue": "^5.2.1",
-    "vite": "^6.2.4"
+    "vite": "^6.2.5"
   }
 }