Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
82c8e56
upgrade action shellcheck to v1.30.0 (#24304)
fs-eire Apr 4, 2025
1cb53d0
[QNN-EP] Fix ONNX context model helper. (#24271)
minfhong-quic Apr 4, 2025
318cc87
[WebGPU] fix Pad cache key (#24305)
fs-eire Apr 4, 2025
56f1018
Bump vite from 6.2.4 to 6.2.5 in /js/web/test/e2e/exports/testcases/v…
dependabot[bot] Apr 4, 2025
2e94c5a
[WebGPU] fix cache key of AttentionProbs/VxAttentionScore (#24309)
fs-eire Apr 4, 2025
e944379
Support Gemma3 with Clip fused attention (#24280)
titaiwangms Apr 4, 2025
11fda2a
Update packaging pipeline for Nodejs binding (#24301)
fs-eire Apr 4, 2025
a4976e3
Add support for uint8_t as data type for GatherBlockQuantized (#24239)
sushraja-msft Apr 4, 2025
9102aae
[Native WebGPU] Add Conv, ConTranspose and FusedConv (#24186)
satyajandhyala Apr 4, 2025
a7e62d6
[webgpu][dawn API optimization] reduce number of calls to wgpuDeviceG…
fs-eire Apr 4, 2025
55c1a3b
Fix 'minimal_power' to 'minimum_power' for DirectML performance selec…
virajwad Apr 5, 2025
d6df4f2
Add ConvTranspose cache key (#24317)
satyajandhyala Apr 5, 2025
a1186f6
[webgpu] Use 1D dispatch groups for attention (#24228)
qjia7 Apr 7, 2025
73676fc
[webgpu][dawn API optimization] reduce number of calls to buffer APIs…
fs-eire Apr 7, 2025
350d140
Implement load cancellation ability (#24257)
yuslepukhin Apr 7, 2025
ca1b32d
[webgpu] Fix ROUND_PREFER_CEIL issue of Resize operator (#24229)
xhcao Apr 7, 2025
b803429
[Native WebGPU] Exclude WebGPU EP from ConvFp16 3D tests. (#24327)
satyajandhyala Apr 7, 2025
554fb4a
[VitisAI EP] export InferShapes to VitisAIEP (#23881)
zz002 Apr 8, 2025
8517c64
Merge branch 'master' into syncing_msft_8_4_25
saurabhkale17 Apr 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/webgpu-validate-shader-key/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ runs:
working-directory: ${{ github.action_path }}

- name: Validate shader keys (native log)
if: ${{ !inputs.is_chromium_log != 'true' }}
if: ${{ inputs.is_chromium_log != 'true' }}
shell: cmd
run: |
node validate-shader-key.js < "${{ inputs.log_file_path }}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
level: info
filter_mode: diff_context
- name: shellcheck # Static check shell scripts
uses: reviewdog/action-shellcheck@v1
uses: reviewdog/action-shellcheck@v1.30.0
with:
github_token: ${{ secrets.github_token }}
reporter: github-pr-check
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/windows-web-ci-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,6 @@ jobs:

- name: Validate shader keys - WebGPU EP
if: ${{ inputs.run_webgpu_tests == true && inputs.build_config == 'Debug' }}
continue-on-error: true
uses: ./.github/actions/webgpu-validate-shader-key
with:
log_file_path: ${{ runner.temp }}\web\test\07\chrome_debug.log
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/windows_webgpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ jobs:
dir ${{ github.workspace }}\RelWithDebInfo\RelWithDebInfo\onnxruntime_test_all_stderr.log

- name: Validate shader keys
continue-on-error: true
uses: ./.github/actions/webgpu-validate-shader-key
with:
log_file_path: ${{ github.workspace }}\RelWithDebInfo\RelWithDebInfo\onnxruntime_test_all_stderr.log
Expand Down
18 changes: 18 additions & 0 deletions csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System;
using System.Runtime.InteropServices;
using static Microsoft.ML.OnnxRuntime.NativeMethods;

namespace Microsoft.ML.OnnxRuntime
{
Expand Down Expand Up @@ -325,6 +326,16 @@ public struct OrtApi
public IntPtr CreateLoraAdapterFromArray;
public IntPtr ReleaseLoraAdapter;
public IntPtr RunOptionsAddActiveLoraAdapter;
public IntPtr SetEpDynamicOptions;
public IntPtr ReleaseValueInfo;
public IntPtr ReleaseNode;
public IntPtr ReleaseGraph;
public IntPtr ReleaseModel;
public IntPtr GetValueInfoName;
public IntPtr GetValueInfoTypeInfo;
public IntPtr GetModelEditorApi;
public IntPtr CreateTensorWithDataAndDeleterAsOrtValue;
public IntPtr SessionOptionsSetLoadCancellationFlag;
}

internal static class NativeMethods
Expand Down Expand Up @@ -404,6 +415,7 @@ static NativeMethods()
OrtReleaseSessionOptions = (DOrtReleaseSessionOptions)Marshal.GetDelegateForFunctionPointer(api_.ReleaseSessionOptions, typeof(DOrtReleaseSessionOptions));
OrtCloneSessionOptions = (DOrtCloneSessionOptions)Marshal.GetDelegateForFunctionPointer(api_.CloneSessionOptions, typeof(DOrtCloneSessionOptions));
OrtSetSessionExecutionMode = (DOrtSetSessionExecutionMode)Marshal.GetDelegateForFunctionPointer(api_.SetSessionExecutionMode, typeof(DOrtSetSessionExecutionMode));
OrtSessionOptionsSetLoadCancellationFlag = (DOrtSessionOptionsSetLoadCancellationFlag)Marshal.GetDelegateForFunctionPointer(api_.SessionOptionsSetLoadCancellationFlag, typeof(DOrtSessionOptionsSetLoadCancellationFlag));
OrtSetOptimizedModelFilePath = (DOrtSetOptimizedModelFilePath)Marshal.GetDelegateForFunctionPointer(api_.SetOptimizedModelFilePath, typeof(DOrtSetOptimizedModelFilePath));
OrtEnableProfiling = (DOrtEnableProfiling)Marshal.GetDelegateForFunctionPointer(api_.EnableProfiling, typeof(DOrtEnableProfiling));
OrtDisableProfiling = (DOrtDisableProfiling)Marshal.GetDelegateForFunctionPointer(api_.DisableProfiling, typeof(DOrtDisableProfiling));
Expand Down Expand Up @@ -1025,6 +1037,12 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
ExecutionMode execution_mode);
public static DOrtSetSessionExecutionMode OrtSetSessionExecutionMode;

[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionOptionsSetLoadCancellationFlag(IntPtr /*(OrtSessionOptions*)*/ options,
bool value);
public static DOrtSessionOptionsSetLoadCancellationFlag OrtSessionOptionsSetLoadCancellationFlag;


[UnmanagedFunctionPointer(CallingConvention.Winapi)]
public delegate IntPtr /*(OrtStatus*)*/ DOrtSetOptimizedModelFilePath(IntPtr /* OrtSessionOptions* */ options, byte[] optimizedModelFilepath);
public static DOrtSetOptimizedModelFilePath OrtSetOptimizedModelFilePath;
Expand Down
10 changes: 10 additions & 0 deletions csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.shared.cs
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,16 @@ public ExecutionMode ExecutionMode
}
private ExecutionMode _executionMode = ExecutionMode.ORT_SEQUENTIAL;

/// <summary>
/// Sets the load cancellation flag for the session. Default is set to false.
/// Provides an opportunity for the user to cancel model loading.
/// </summary>
/// <param name="value">true to request cancellation, false to proceed</param>
public void SetLoadCancellationFlag(bool value)
{
NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionOptionsSetLoadCancellationFlag(handle, value));
}

#endregion

#region Private Methods
Expand Down
5 changes: 3 additions & 2 deletions docs/ContribOperators.md
Original file line number Diff line number Diff line change
Expand Up @@ -2039,10 +2039,11 @@ This version of the operator has been available since version 1 of the 'com.micr
1. Input `data` is a constant. It is quantized block-wise along attribute `quantize_axis` with block size specified by attribute `block_size`.
`block_size must` be a power of 2 and not smaller than 16, like 16, 32, 64, 128, ..
2. Input `data`'s scale and zero point are specified by input `scales` and `zero_points`. `scales` and `zero_points` are also constants.
If `zero_points` is not provided, 0 is the zero point.
If `zero_points` is not provided, 0 is the zero point except when data is uint8 type then the default zero point is 8.
3. During the op execution, `data` and `indices` are first used to generate the quantized output. Then, `scales` and `zero_points` are used
to dequantize the output.
4. The `output` and `scales` have the same type. The `data` and `zero_points` have the same type.
5. For uint8 data, the `gather_axis` must be 0.

#### Version

Expand Down Expand Up @@ -2082,7 +2083,7 @@ This version of the operator has been available since version 1 of the 'com.micr
#### Type Constraints

<dl>
<dt><tt>T1</tt> : tensor(int4), tensor(uint4)</dt>
<dt><tt>T1</tt> : tensor(int4), tensor(uint4), tensor(uint8)</dt>
<dd>Constrain quantized types.</dd>
<dt><tt>T2</tt> : tensor(float), tensor(float16), tensor(bfloat16)</dt>
<dd>Constrain dequantized types.</dd>
Expand Down
2 changes: 1 addition & 1 deletion docs/OperatorKernels.md
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ Do not modify directly.*
|FusedConv|*in* X:**T**<br> *in* W:**T**<br> *in* B:**T**<br> *in* Z:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
|FusedGemm|*in* A:**T**<br> *in* B:**T**<br> *in* C:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
|FusedMatMul|*in* A:**T**<br> *in* B:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
|GatherBlockQuantized|*in* data:**T1**<br> *in* indices:**Tind**<br> *in* scales:**T2**<br> *in* zero_points:**T1**<br> *out* output:**T2**|1+|**T1** = tensor(int4), tensor(uint4)<br/> **T2** = tensor(float), tensor(float16)<br/> **Tind** = tensor(int32), tensor(int64)|
|GatherBlockQuantized|*in* data:**T1**<br> *in* indices:**Tind**<br> *in* scales:**T2**<br> *in* zero_points:**T1**<br> *out* output:**T2**|1+|**T1** = tensor(int4), tensor(uint4), tensor(uint8)<br/> **T2** = tensor(float), tensor(float16)<br/> **Tind** = tensor(int32), tensor(int64)|
|GatherND|*in* data:**T**<br> *in* indices:**Tind**<br> *out* output:**T**|1+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)<br/> **Tind** = tensor(int32), tensor(int64)|
|Gelu|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
|GreedySearch|*in* input_ids:**I**<br> *in* max_length:**I**<br> *in* min_length:**I**<br> *in* repetition_penalty:**T**<br> *in* vocab_mask:**I**<br> *in* prefix_vocab_mask:**I**<br> *in* attention_mask:**I**<br> *out* sequences:**I**|1+|**T** = tensor(float)|
Expand Down
32 changes: 31 additions & 1 deletion include/onnxruntime/core/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,26 @@ void LogRuntimeError(uint32_t session_id, const common::Status& status, const ch
abort(); \
} while (false)

#define ORT_THROW_FROM_STATUS(status) \
do { \
::onnxruntime::PrintFinalMessage( \
::onnxruntime::OnnxRuntimeException( \
ORT_WHERE_WITH_STACK, status.ToString()) \
.what()); \
abort(); \
} while (false)

#define ORT_THROW_WITH_CATEGORY_AND_CODE(category, code, ...) \
do { \
::onnxruntime::PrintFinalMessage( \
::onnxruntime::OnnxRuntimeException(ORT_WHERE_WITH_STACK, \
::onnxruntime::MakeString(__VA_ARGS__), \
::onnxruntime::common::category, \
::onnxruntime::common::code) \
.what()); \
abort(); \
} while (false)

#else

#define ORT_TRY try
Expand Down Expand Up @@ -180,6 +200,16 @@ void LogRuntimeError(uint32_t session_id, const common::Status& status, const ch
#define ORT_THROW_EX(ex, ...) \
throw ex(__VA_ARGS__)

#define ORT_THROW_FROM_STATUS(status) \
throw ::onnxruntime::OnnxRuntimeException(ORT_WHERE_WITH_STACK, status.ToString(), status.Category(), \
static_cast<::onnxruntime::common::StatusCode>(status.Code()))

#define ORT_THROW_WITH_CATEGORY_AND_CODE(category, code, ...) \
throw ::onnxruntime::OnnxRuntimeException(ORT_WHERE_WITH_STACK, \
::onnxruntime::MakeString(__VA_ARGS__), \
::onnxruntime::common::category, \
::onnxruntime::common::code)

#endif

#define ORT_MAKE_STATUS(category, code, ...) \
Expand Down Expand Up @@ -237,7 +267,7 @@ void LogRuntimeError(uint32_t session_id, const common::Status& status, const ch
auto _status = (expr); \
if ((!_status.IsOK())) { \
::onnxruntime::LogRuntimeError(0, _status, __FILE__, static_cast<const char*>(__FUNCTION__), __LINE__); \
ORT_THROW(_status); \
ORT_THROW_FROM_STATUS(_status); \
} \
} while (0)

Expand Down
47 changes: 45 additions & 2 deletions include/onnxruntime/core/common/exceptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <vector>

#include "core/common/common.h"
#include "core/common/status.h"
#include "core/common/code_location.h"

namespace onnxruntime {
Expand All @@ -35,12 +36,44 @@ class OnnxRuntimeException : public std::exception {
/**
Create a new exception that captures the location it was thrown from.
@param location Location in the source code the exception is being thrown from
@param msg Message containing additional information about the exception cause.
@param category Error category
@param code Error code
*/

OnnxRuntimeException(const CodeLocation& location,
const std::string& message,
common::StatusCategory category,
common::StatusCode code) noexcept
: OnnxRuntimeException(location, nullptr, message, category, code) {
}

/**
Create a new exception that captures the location it was thrown from.
The instance will be created with ONNXRUNTIME category and FAIL code.
@param location Location in the source code the exception is being thrown from
@param failed_condition Optional string containing the condition that failed.
e.g. "tensor.Size() == input.Size()". May be nullptr.
@param msg Message containing additional information about the exception cause.
*/
OnnxRuntimeException(const CodeLocation& location, const char* failed_condition, const std::string& msg)
: location_{location} {
OnnxRuntimeException(const CodeLocation& location, const char* failed_condition, const std::string& msg) noexcept
: OnnxRuntimeException(location, failed_condition, msg,
common::StatusCategory::ONNXRUNTIME, common::StatusCode::FAIL) {
}

/**
Create a new exception that captures the location it was thrown from.
@param location Location in the source code the exception is being thrown from
@param failed_condition Optional string containing the condition that failed.
e.g. "tensor.Size() == input.Size()". May be nullptr.
@param msg Message containing additional information about the exception cause.
@param category Error category
@param code Error code
*/
OnnxRuntimeException(const CodeLocation& location, const char* failed_condition, const std::string& msg,
common::StatusCategory category,
common::StatusCode code)
: location_{location}, category_(category), code_(code) {
std::ostringstream ss;

ss << location.ToString(CodeLocation::kFilenameAndPath); // output full path in case just the filename is ambiguous
Expand All @@ -58,6 +91,14 @@ class OnnxRuntimeException : public std::exception {
what_ = ss.str();
}

common::StatusCategory Category() const noexcept {
return category_;
}

common::StatusCode Code() const noexcept {
return code_;
}

const char* what() const noexcept override {
return what_.c_str();
}
Expand All @@ -66,6 +107,8 @@ class OnnxRuntimeException : public std::exception {
const CodeLocation location_;
const std::vector<std::string> stacktrace_;
std::string what_;
common::StatusCategory category_;
common::StatusCode code_;
};

} // namespace onnxruntime
7 changes: 6 additions & 1 deletion include/onnxruntime/core/common/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
MODEL_LOADED = 8,
NOT_IMPLEMENTED = 9,
INVALID_GRAPH = 10,
EP_FAIL = 11
EP_FAIL = 11,
MODEL_LOAD_CANCELED = 12,
};

constexpr const char* StatusCodeToString(StatusCode status) noexcept {
Expand Down Expand Up @@ -72,6 +73,8 @@
return "INVALID_GRAPH";
case StatusCode::EP_FAIL:
return "EP_FAIL";
case StatusCode::MODEL_LOAD_CANCELED:
return "MODEL_LOAD_CANCELED";
default:
return "GENERAL ERROR";
}
Expand Down Expand Up @@ -104,6 +107,8 @@
return HRESULT_FROM_WIN32(ERROR_FILE_CORRUPT);
case StatusCode::EP_FAIL:
return HRESULT_FROM_WIN32(ERROR_INTERNAL_ERROR);
case StatusCode::MODEL_LOAD_CANCELED:
return HRESULT_FROM_WIN32(ERROR_CANCELLED);

Check notice on line 111 in include/onnxruntime/core/common/status.h

View workflow job for this annotation

GitHub Actions / misspell

[misspell] include/onnxruntime/core/common/status.h#L111

"CANCELLED" is a misspelling of "CANCELED"
Raw output
./include/onnxruntime/core/common/status.h:111:38: "CANCELLED" is a misspelling of "CANCELED"
default:
return E_FAIL;
}
Expand Down
19 changes: 19 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ typedef enum OrtErrorCode {
ORT_NOT_IMPLEMENTED,
ORT_INVALID_GRAPH,
ORT_EP_FAIL,
ORT_MODEL_LOAD_CANCELED,
} OrtErrorCode;

typedef enum OrtOpAttrType {
Expand Down Expand Up @@ -4898,6 +4899,24 @@ struct OrtApi {
_In_ const int64_t* shape, size_t shape_len,
ONNXTensorElementDataType type,
_Outptr_ OrtValue** out);

/** \brief sets load cancellation flag to abort session loading process.
*
* \param[in] options instance that was passed to the session at creation time.
* \param[in] cancel setting this to true after model loading process was initiated will
* attempt to cancel the loading process. If cancellation is successful, CreateSession()
* CreateSessionFromArray() or any other session creation API that take session options as an
* argument will return an OrtStatus indicating that session loading was canceled at user request,
* error code ORT_MODEL_LOAD_CANCELED.
* The APIs above would not return any valid Session instance. This is the best case effort and the result
* is not guaranteed. The session may have already been created and initialized
* before the cancellation request was issued.
*
* \snippet{doc} snippets.dox OrtStatus
*
*/
ORT_API2_STATUS(SessionOptionsSetLoadCancellationFlag, _Inout_ OrtSessionOptions* options,
_In_ bool cancel);
};

/*
Expand Down
2 changes: 2 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,8 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl<T> {

SessionOptionsImpl& SetExecutionMode(ExecutionMode execution_mode); ///< Wraps OrtApi::SetSessionExecutionMode

SessionOptionsImpl& SetLoadCancellationFlag(bool value); ///< Wraps OrtApi::SessionOptionsSetLoadCancellationFlag

SessionOptionsImpl& SetLogId(const char* logid); ///< Wraps OrtApi::SetSessionLogId
SessionOptionsImpl& SetLogSeverityLevel(int level); ///< Wraps OrtApi::SetSessionLogSeverityLevel

Expand Down
6 changes: 6 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,12 @@ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetExecutionMode(ExecutionM
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetLoadCancellationFlag(bool value) {
ThrowOnError(GetApi().SessionOptionsSetLoadCancellationFlag(this->p_, value));
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetLogId(const char* logid) {
ThrowOnError(GetApi().SetSessionLogId(this->p_, logid));
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
},
"devDependencies": {
"@vitejs/plugin-vue": "^5.2.1",
"vite": "^6.2.4"
"vite": "^6.2.5"
}
}
Loading
Loading