From 26250ae74d2c9a3c6860625ba4a147ddfb936907 Mon Sep 17 00:00:00 2001 From: Prathik Rao Date: Wed, 14 Aug 2024 13:45:35 -0700 Subject: [PATCH] ORT 1.19.0 Release: Cherry-Pick Round 2 (#21726) ### Description PRs marked for cherry-pick & bug fixes. ### Motivation and Context ORT 1.19.0 Release Preparation --------- Signed-off-by: Liqun Fu Co-authored-by: George Wu Co-authored-by: liqun Fu Co-authored-by: Scott McKay Co-authored-by: Yi Zhang --- js/react_native/android/build.gradle | 3 +- js/react_native/e2e/android/build.gradle | 6 +- .../cpu/quantization/matmul_nbits.cc | 6 +- .../azure-pipelines/bigmodels-ci-pipeline.yml | 1 + .../orttraining-linux-gpu-ci-pipeline.yml | 2 +- ...orttraining-py-packaging-pipeline-cuda.yml | 2 +- ...ttraining-py-packaging-pipeline-cuda12.yml | 2 +- .../templates/py-packaging-stage.yml | 2 +- .../templates/py-win-arm64ec-qnn.yml | 165 ++++++++++++++++++ .../ortmodule/stage2/requirements.txt | 2 +- 10 files changed, 177 insertions(+), 14 deletions(-) create mode 100644 tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml diff --git a/js/react_native/android/build.gradle b/js/react_native/android/build.gradle index e52bec0b57cde..825990eba0fb8 100644 --- a/js/react_native/android/build.gradle +++ b/js/react_native/android/build.gradle @@ -3,7 +3,7 @@ import java.nio.file.Paths buildscript { repositories { google() - jcenter() + mavenCentral() } dependencies { @@ -145,7 +145,6 @@ android { repositories { mavenCentral() - jcenter() google() def found = false diff --git a/js/react_native/e2e/android/build.gradle b/js/react_native/e2e/android/build.gradle index 08e1f9c017584..5932dfc5695d6 100644 --- a/js/react_native/e2e/android/build.gradle +++ b/js/react_native/e2e/android/build.gradle @@ -10,7 +10,7 @@ buildscript { } repositories { google() - jcenter() + mavenCentral() } dependencies { classpath('com.android.tools.build:gradle:7.1.1') @@ -31,13 +31,13 @@ allprojects { // Android JSC is installed from npm url("$rootDir/../node_modules/jsc-android/dist") } - maven { + maven { // Add Detox as a precompiled native dependency url("$rootDir/../node_modules/detox/Detox-android") } google() - jcenter() + mavenCentral() maven { url 'https://www.jitpack.io' } } } diff --git a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc index 5fdd2b017b8a6..bf43aca73ef3a 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc @@ -105,17 +105,15 @@ class MatMulNBits final : public OpKernel { ORT_ENFORCE(nbits_ == 4, "Only 4b quantization is supported for MatMulNBits op, additional bits support is planned."); const Tensor* tensor_zero_point = nullptr; - has_zp_input_ = info.TryGetConstantInput(3, &tensor_zero_point); + has_zp_input_ = info.TryGetConstantInput(InputIndex::zero_points, &tensor_zero_point); #ifdef ORT_NEURAL_SPEED const Tensor* tensor_B = nullptr; const Tensor* tensor_scale = nullptr; - const Tensor* tensor_zero_point = nullptr; bool B_constant = info.TryGetConstantInput(InputIndex::B, &tensor_B); bool scale_constant = info.TryGetConstantInput(InputIndex::scales, &tensor_scale); - bool zero_point_constant = info.TryGetConstantInput(InputIndex::zero_points, &tensor_zero_point); is_asym_ = zero_point_arg != nullptr; all_constant_ = B_constant && scale_constant; - all_constant_ = is_asym_ ? all_constant_ && zero_point_constant : all_constant_; + all_constant_ = is_asym_ ? all_constant_ && has_zp_input_ : all_constant_; #endif } diff --git a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml index 4a3532dd57fa3..20b77ca7e3e7d 100644 --- a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml @@ -282,6 +282,7 @@ stages: - stage: Llama2_7B_ONNX dependsOn: - Build_Onnxruntime_Cuda + condition: and (succeeded(), or(eq(variables['Build.SourceBranch'], 'refs/heads/main'), startsWith(variables['Build.SourceBranch'], 'refs/heads/rel-'))) jobs: - job: Llama2_7B_ONNX timeoutInMinutes: 120 diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml index 2d2719fef8f3d..5c5b781301db2 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml @@ -48,7 +48,7 @@ jobs: RunInjectedPipeline: 'true' InjectedPipeline: 'orttraining-linux-gpu-test-ci-pipeline.yml' DockerImageTag: 'onnxruntime_orttraining_ortmodule_tests_image' - TimeoutInMinutes: 140 + TimeoutInMinutes: 150 # Enable unreleased onnx opsets in CI builds # This facilitates testing the implementation for the new opsets AllowReleasedOpsetOnly: '0' diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml index be3f67ba450b4..6a772ebc1e1db 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml @@ -18,7 +18,7 @@ stages: torch_version: '2.0.0' opset_version: '17' cuda_version: '11.8' - cmake_cuda_architectures: 60;61;70;75;80;86 + cmake_cuda_architectures: 70;75;80;86 docker_file: Dockerfile.manylinux2_28_training_cuda11_8 agent_pool: Onnxruntime-Linux-GPU upload_wheel: 'yes' diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml index 265db420b1af7..78f115a8972a9 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda12.yml @@ -8,7 +8,7 @@ stages: torch_version: '2.1.0' opset_version: '17' cuda_version: '12.2' - cmake_cuda_architectures: 70;75;80;86;90 + cmake_cuda_architectures: 80;86;90 docker_file: Dockerfile.manylinux2_28_training_cuda12_2 agent_pool: Onnxruntime-Linux-GPU upload_wheel: 'yes' diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml index faf453140052b..c90827fa21238 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml @@ -516,7 +516,7 @@ stages: - stage: Python_Packaging_Windows_x64_QNN dependsOn: [] jobs: - - template: py-win-x64-qnn.yml + - template: py-win-arm64ec-qnn.yml parameters: MACHINE_POOL: 'Onnxruntime-QNNEP-Windows-2022-CPU' QNN_SDK: ${{ parameters.qnn_sdk_version }} diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml new file mode 100644 index 0000000000000..775244943484c --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64ec-qnn.yml @@ -0,0 +1,165 @@ +parameters: + +- name: MACHINE_POOL + type: string + default: 'Onnxruntime-QNNEP-Windows-2022-CPU' + +- name: QNN_SDK + displayName: QNN SDK Version + type: string + default: 2.24.0.240626 + +- name: ENV_SETUP_SCRIPT + type: string + default: '' + +- name: BUILD_PY_PARAMETERS + displayName: > + Extra parameters to pass to build.py. Don't put newlines in here. + type: string + default: '' + +jobs: +- job: Win_py_x64_qnn_Wheels + timeoutInMinutes: 210 + workspace: + clean: all + pool: + name: ${{ parameters.MACHINE_POOL }} + strategy: + matrix: + Python38_x64: + PythonVersion: '3.8' + Python39_x64: + PythonVersion: '3.9' + Python310_x64: + PythonVersion: '3.10' + Python311_x64: + PythonVersion: '3.11' + Python312_x64: + PythonVersion: '3.12' + variables: + GRADLE_OPTS: '-Dorg.gradle.daemon=false' + VSGenerator: 'Visual Studio 17 2022' + steps: + - checkout: self + clean: true + submodules: recursive + + - template: telemetry-steps.yml + + - task: UsePythonVersion@0 + inputs: + versionSpec: $(PythonVersion) + addToPath: true + architecture: 'x64' + + - task: onebranch.pipeline.tsaoptions@1 + displayName: 'OneBranch TSAOptions' + inputs: + tsaConfigFilePath: '$(Build.SourcesDirectory)\.config\tsaoptions.json' + appendSourceBranchName: false + + - template: download-deps.yml + + - task: PythonScript@0 + displayName: 'Update deps.txt' + inputs: + scriptPath: $(Build.SourcesDirectory)/tools/ci_build/replace_urls_in_deps.py + arguments: --new_dir $(Build.BinariesDirectory)/deps + workingDirectory: $(Build.BinariesDirectory) + + - task: PowerShell@2 + displayName: 'Install ONNX' + inputs: + filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/windows/install_third_party_deps.ps1' + workingDirectory: '$(Build.BinariesDirectory)' + arguments: -cpu_arch x64 -install_prefix $(Build.BinariesDirectory)\RelWithDebInfo\installed -build_config RelWithDebInfo + + - template: set-nightly-build-option-variable-step.yml + + - template: jobs/download_win_qnn_sdk.yml + parameters: + QnnSDKVersion: ${{ parameters.QNN_SDK }} + + - task: PythonScript@0 + displayName: 'Generate cmake config' + inputs: + scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' + arguments: > + --config RelWithDebInfo + --build_dir $(Build.BinariesDirectory) + --skip_submodule_sync + --cmake_generator "$(VSGenerator)" + --use_qnn + --qnn_home $(QnnSDKRootDir) + --enable_pybind + --parallel --update --arm64ec + $(TelemetryOption) ${{ parameters.BUILD_PY_PARAMETERS }} + workingDirectory: '$(Build.BinariesDirectory)' + + - task: VSBuild@1 + displayName: 'Build' + inputs: + solution: '$(Build.BinariesDirectory)\RelWithDebInfo\onnxruntime.sln' + platform: 'arm64ec' + configuration: RelWithDebInfo + msbuildArchitecture: 'x64' + maximumCpuCount: true + logProjectEvents: true + workingFolder: '$(Build.BinariesDirectory)\RelWithDebInfo' + createLogFile: true + + # Esrp signing + - template: win-esrp-dll.yml + parameters: + FolderPath: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\onnxruntime\capi' + DisplayName: 'ESRP - Sign Native dlls' + DoEsrp: true + Pattern: '*.pyd' + + - task: PythonScript@0 + displayName: 'Build wheel' + inputs: + scriptPath: '$(Build.SourcesDirectory)\setup.py' + arguments: 'bdist_wheel $(NightlyBuildOption) --wheel_name_suffix=qnn' + workingDirectory: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' + + - task: CopyFiles@2 + displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\dist' + Contents: '*.whl' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: ONNXRuntime python wheel' + inputs: + ArtifactName: onnxruntime_qnn + + - script: | + 7z x *.whl + workingDirectory: '$(Build.ArtifactStagingDirectory)' + displayName: 'unzip the package' + + - task: CredScan@3 + displayName: 'Run CredScan' + inputs: + debugMode: false + continueOnError: true + + - task: BinSkim@4 + displayName: 'Run BinSkim' + inputs: + AnalyzeTargetGlob: '+:file|$(Build.ArtifactStagingDirectory)\**\*.dll' + + - task: TSAUpload@2 + displayName: 'TSA upload' + condition: and (succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/main')) + inputs: + GdnPublishTsaOnboard: false + GdnPublishTsaConfigFile: '$(Build.sourcesDirectory)\.gdn\.gdntsa' + + - template: component-governance-component-detection-steps.yml + parameters: + condition: 'succeeded' diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt index d7fab6a1c8a27..3b13a51f18e27 100644 --- a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt @@ -8,7 +8,7 @@ rsa==4.9 tensorboard==2.13.0 h5py wget -pytorch-lightning +pytorch-lightning==2.3.3 deepspeed==0.9.0 fairscale==0.4.6 parameterized>=0.8.1