Skip to content

Commit

Permalink
[QNN EP] QNN SDK 2.28.2 (#22844)
Browse files Browse the repository at this point in the history
### Description
- Updates pipelines to use QNN SDK 2.28.2.241116.
- Re-enable LayerNormalization unit tests that failed with accuracy
errors with the previous QNN SDK (2.28.0).
- Update QNN EP to no longer provide a dummy bias for LayerNorm if the
QNN SDK version is >= 2.28.0.


### Motivation and Context
Use the latest QNN SDK. This version improves inference latency for
certain customer models.
  • Loading branch information
adrianlizarraga authored Nov 19, 2024
1 parent e597eae commit 497b06f
Show file tree
Hide file tree
Showing 21 changed files with 39 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ Status LayerNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[BIAS_IDX], logger, input_names));
}

#if QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 17)
#if QNN_API_VERSION_MAJOR == 2 && QNN_API_VERSION_MINOR >= 17 && QNN_API_VERSION_MINOR <= 20
if (!has_bias_input && IsNpuBackend(qnn_model_wrapper.GetQnnBackendType())) {
// Bias is implicit. QNN SDK 2.24+ (QNN API version 2.17+) has a validation bug for implicit bias inputs,
// so provide an explicit bias of all 0 (quantized int32).
// Bias is implicit. QNN SDK 2.24 to 2.27 (QNN API version 2.17 to 2.20) has a validation bug for
// implicit bias inputs, so provide an explicit bias of all 0 (quantized int32).
TensorInfo x_input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[X_IDX], x_input_info));

Expand Down
10 changes: 9 additions & 1 deletion onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "DSP/QnnDspCommon.h"
#include "HTP/QnnHtpCommon.h"
#include "HTP/QnnHtpContext.h"
#include "Saver/QnnSaver.h"
#include <gsl/gsl>
#include "core/framework/endian_utils.h"
#include "core/common/logging/capture.h"
Expand Down Expand Up @@ -1040,7 +1041,14 @@ Status QnnBackendManager::ExtractBackendProfilingInfo() {
const QnnProfile_EventId_t* profile_events{nullptr};
uint32_t num_events{0};
Qnn_ErrorHandle_t result = qnn_interface_.profileGetEvents(profile_backend_handle_, &profile_events, &num_events);
ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result, "Failed to get profile events. Error: ", QnnErrorHandleToString(result));
if (!qnn_saver_path_.empty()) { // Using QNN Saver backend
// QNN SDK 2.28.2 returns QNN_SAVER_ERROR_DUMMY_RETVALUE, but previous QNN versions return QNN_PROFILE_NO_ERROR.
// We accept both values.
ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result && QNN_SAVER_ERROR_DUMMY_RETVALUE != result,
"Failed to get profile events. Error: ", QnnErrorHandleToString(result));
} else {
ORT_RETURN_IF(QNN_PROFILE_NO_ERROR != result, "Failed to get profile events. Error: ", QnnErrorHandleToString(result));
}

if (num_events > 0) {
LOGS(*logger_, VERBOSE) << "profile_events: " << profile_events << " num_events: " << num_events;
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/test/providers/qnn/gather_op_htp_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ TEST_F(QnnHTPBackendTests, GatherOp_IndicesDynamicInt32_Axis0) {
}

// disabled for QNN 2.28.0.241029 failed for accuracy validation
// Also fails on QNN 2.28.2.
// qdq@QNN_EP val: 3.6094117164611816 (err: 1.3094117641448975, err/output_range: 22.19342041015625%)
// qdq@CPU_EP val: 2.2905881404876709 (err: 0.0094118118286132812, err/output_range: 0.15952222049236298%)
// abs(qdq@QNN_EP - qdq@CPU_EP) / output_range = 22.033897399902344%
Expand Down
27 changes: 9 additions & 18 deletions onnxruntime/test/providers/qnn/layer_norm_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,15 +188,11 @@ TEST_F(QnnHTPBackendTests, LayerNorm1D_LastAxis_StaticScale_StaticBias_AU8_WU8_B
ExpectedEPNodeAssignment::All);
}

// QNN 2.27 accuracy issue
// Inaccuracy detected for output 'output_0', element 0
// output_range=1.2245157957077026, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): -0
// qdq@QNN_EP val: 0.19133351743221283 (err: 0.19133351743221283, err/output_range: 15.625238418579102%)
// qdq@CPU_EP val: 0 (err: 0, err/output_range: 0%)
TEST_F(QnnHTPBackendTests, DISABLED_LayerNorm1D_QNN2_24_ImplicitBias_ValidationBug) {
// QNN 2.24 LayerNorm fails validation (intermittent) if the bias input is not provided. QNN EP will provide an
// explicit bias of all zeros to get around this bug.
TEST_F(QnnHTPBackendTests, LayerNorm1D_QNN2_24_ImplicitBias_ValidationBug) {
// QNN 2.24 to 2.27: LayerNorm fails validation (intermittent) if the bias input is not provided. QNN EP will provide
// an explicit bias of all zeros to get around this bug.
// QNN 2.28.0: Validation bug is fixed, but get accuracy errors.
// QNN 2.28.2: All fixed.
for (size_t i = 0; i < 15; i++) { // Run it multiple times since this is an intermittent bug.
RunLayerNormQDQTest<uint16_t, uint8_t>(TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(0.0f, 1.0f, 6)),
TestInputDef<float>({3}, true, GetFloatDataInRange(0.0f, 1.0f, 3)),
Expand All @@ -207,14 +203,9 @@ TEST_F(QnnHTPBackendTests, DISABLED_LayerNorm1D_QNN2_24_ImplicitBias_ValidationB
}
}

// Test accuracy of 16-bit QDQ LayerNorm with a static scale input.
// QNN 2.27 accuracy issue
// Inaccuracy detected for output 'output_0', element 0
// output_range=1.224743127822876, tolerance=0.40000000596046448%.
// Expected val (f32@CPU_EP): -0
// qdq@QNN_EP val: 0.19136904180049896 (err: 0.19136904180049896, err/output_range: 15.625238418579102%)
// qdq@CPU_EP val: 0 (err: 0, err/output_range: 0%)
TEST_F(QnnHTPBackendTests, DISABLED_LayerNorm1D_LastAxis_StaticScale_AU16_WU8) {
TEST_F(QnnHTPBackendTests, LayerNorm1D_LastAxis_StaticScale_AU16_WU8) {
// QNN 2.28.0: Get accuracy errors.
// QNN 2.28.2: All fixed.
RunLayerNormQDQTest<uint16_t, uint8_t>(TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(0.0f, 10.0f, 6)),
TestInputDef<float>({3}, true, GetFloatDataInRange(0.0f, 1.0f, 3)), // Static
TestInputDef<float>(),
Expand All @@ -225,7 +216,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_LayerNorm1D_LastAxis_StaticScale_AU16_WU8) {

// Test accuracy of 8-bit QDQ LayerNorm with a dynamic scale input.
//
// TODO(adrianlizarraga): Fails to finalize with QNN SDK 2.22.
// TODO(adrianlizarraga): Fails to finalize with QNN SDK 2.22. Still fails on QNN SDK 2.28.2.
// Verbose logs:
// Starting stage: Graph Transformations and Optimizations
// C:\...\QNN\HTP\HTP\src\hexagon\prepare\graph_prepare.cc:203:ERROR:could not create op: q::flat_to_vtcm
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/test/providers/qnn/matmul_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ TEST_F(QnnHTPBackendTests, MatMulOp_PerChannel_A16_WeightUInt4) {
}

// Test QDQ per-channel MatMul with int8 act, int4 weights (static)
// QNN 2.27 regression
// QNN 2.27 regression. Also fails on QNN 2.28.2.
// Failed to finalize QNN graph. Error code: 1002
TEST_F(QnnHTPBackendTests, DISABLED_MatMulOp_PerChannel_AS8_WeightInt4) {
std::vector<float> input0_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/test/providers/qnn/simple_op_htp_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Tanh) {
}

// disabled for QNN 2.28.0.241029 backendValidateOpConfig failed
// still fails on QNN 2.28.2.
// QnnDsp <E> [4294967295] has incorrect Value -32768, expected equal to 0.
// QnnDsp <V> validateNativeOps node_token_6:qti.aisw:Tanh htp op validator failed 3110
// QnnDsp <V> registered validator failed => 3110
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK version
type: string
default: 2.28.0.241029
default: 2.28.2.241116

jobs:
- job: Build_QNN_EP
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK Version
type: string
default: 2.28.0.241029
default: 2.28.2.241116

resources:
repositories:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK version
type: string
default: 2.28.0.241029
default: 2.28.2.241116

jobs:
- job: Build_QNN_EP
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ parameters:
- name: qnn_sdk_version
type: string
displayName: 'QNN SDK version. Only for QNN packages.'
default: 2.28.0.241029
default: 2.28.2.241116

trigger: none

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK Version
type: string
default: 2.28.0.241029
default: 2.28.2.241116

- name: build_config
displayName: Build Configuration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ parameters:
- name: qnn_sdk_version
type: string
displayName: 'QNN SDK version. Only for QNN packages.'
default: 2.28.0.241029
default: 2.28.2.241116

stages:
- ${{ if eq(parameters.enable_windows_cpu, true) }}:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
parameters:
- name: QnnSDKVersion
type: string
default: '2.28.0.241029'
default: '2.28.2.241116'

steps:
- script: |
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
parameters:
- name: QnnSDKVersion
type: string
default: '2.28.0.241029'
default: '2.28.2.241116'

steps:
- powershell: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK version
type: string
default: 2.28.0.241029
default: 2.28.2.241116

jobs:
- job: Linux_py_qnn_Wheels_x64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ parameters:
- name: QNN_SDK
displayName: QNN SDK Version
type: string
default: 2.28.0.241029
default: 2.28.2.241116

- name: ENV_SETUP_SCRIPT
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ parameters:
- name: QNN_SDK
displayName: QNN SDK Version
type: string
default: 2.28.0.241029
default: 2.28.2.241116

- name: ENV_SETUP_SCRIPT
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ parameters:
- name: QNN_SDK
displayName: QNN SDK Version
type: string
default: 2.28.0.241029
default: 2.28.2.241116

- name: ENV_SETUP_SCRIPT
type: string
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
parameters:
QnnSdk: '2.28.0.241029'
QnnSdk: '2.28.2.241116'
build_config: 'RelWithDebInfo'
IsReleaseBuild: false
DoEsrp: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK version
type: string
default: 2.28.0.241029
default: 2.28.2.241116

jobs:
- job: 'build'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK version
type: string
default: 2.28.0.241029
default: 2.28.2.241116

jobs:
- job: 'build'
Expand Down

0 comments on commit 497b06f

Please sign in to comment.