diff --git a/.clang-format b/.clang-format
index da3eac14489b1..efda3f8673147 100644
--- a/.clang-format
+++ b/.clang-format
@@ -7,6 +7,9 @@ BasedOnStyle: Google
ColumnLimit: 0
SortIncludes: false
DerivePointerAlignment: false
+# Avoid adding spaces between tokens in GSL_SUPPRESS arguments.
+# E.g., don't change "GSL_SUPPRESS(r.11)" to "GSL_SUPPRESS(r .11)".
+WhitespaceSensitiveMacros: ["GSL_SUPPRESS"]
# if you want to customize when working locally see https://clang.llvm.org/docs/ClangFormatStyleOptions.html for options.
# See ReformatSource.ps1 for a script to update all source according to the current options in this file.
diff --git a/.gdn/.gdntsa b/.gdn/.gdntsa
index 2992cab431757..e49848d116d90 100644
--- a/.gdn/.gdntsa
+++ b/.gdn/.gdntsa
@@ -1,3 +1,3 @@
{
- "codebaseName": "onnxruntime_master"
+ "codebaseName": "onnxruntime_main"
}
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/04-web.yml b/.github/ISSUE_TEMPLATE/04-web.yml
index 15919a5983637..d84226ff5bfe1 100644
--- a/.github/ISSUE_TEMPLATE/04-web.yml
+++ b/.github/ISSUE_TEMPLATE/04-web.yml
@@ -55,8 +55,10 @@ body:
attributes:
label: Execution Provider
options:
- - WebGL
- - WASM
+ - "'webgl' (WebGL)"
+ - "'wasm'/'cpu' (WebAssembly CPU)"
+ - "'xnnpack' (WebAssembly XNNPACK)"
+ - "'webgpu' (WebGPU)"
- Other / Unknown
multiple: yes
validations:
diff --git a/.github/workflows/cffconvert.yml b/.github/workflows/cffconvert.yml
index 707a71c4b20fe..7144363717749 100644
--- a/.github/workflows/cffconvert.yml
+++ b/.github/workflows/cffconvert.yml
@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out a copy of the repository
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
- name: Check whether the citation metadata from CITATION.cff is valid
uses: citation-file-format/cffconvert-github-action@2.0.0
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 2fe66013ebbbc..d3ecf44fe5733 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -33,7 +33,7 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
diff --git a/.github/workflows/gradle-wrapper-validation.yml b/.github/workflows/gradle-wrapper-validation.yml
index 07346b38b2151..03ea773a25130 100644
--- a/.github/workflows/gradle-wrapper-validation.yml
+++ b/.github/workflows/gradle-wrapper-validation.yml
@@ -10,5 +10,5 @@ jobs:
name: "Validation"
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- uses: gradle/wrapper-validation-action@v1
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index d1ba497f856b0..4a4e286071ff5 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -7,7 +7,7 @@ jobs:
triage:
runs-on: ubuntu-latest
steps:
- - uses: github/issue-labeler@v2.5
+ - uses: github/issue-labeler@v3.2
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: .github/labeler.yml
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 83d2a4bfd69c9..432c789e943b5 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -3,8 +3,8 @@ name: Lint
on:
push:
branches:
- - master
- main
+ - rel-*
pull_request:
jobs:
@@ -12,7 +12,7 @@ jobs:
name: Optional Lint
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: misspell # Check spellings as well
uses: reviewdog/action-misspell@v1
with:
@@ -34,7 +34,7 @@ jobs:
name: Python format
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
@@ -100,7 +100,7 @@ jobs:
name: Lint JavaScript
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- uses: reviewdog/action-eslint@v1
with:
reporter: github-pr-check
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 92ddf900d9c5e..7b314d845d9b4 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -2,18 +2,22 @@ name: Linux_CI
on:
push:
branches:
- - master
- main
+ - rel-*
pull_request:
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+ cancel-in-progress: true
+
jobs:
Onnxruntime-TVM:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
with:
submodules: true
- - uses: actions/setup-python@v3
+ - uses: actions/setup-python@v4
with:
python-version: '3.8.x'
architecture: 'x64'
diff --git a/.github/workflows/publish-c-apidocs.yml b/.github/workflows/publish-c-apidocs.yml
index c08363ab39369..0a3e9ed2594c1 100644
--- a/.github/workflows/publish-c-apidocs.yml
+++ b/.github/workflows/publish-c-apidocs.yml
@@ -6,8 +6,8 @@ on:
branches:
- main
paths:
- - include/onnxruntime/core/session
- - orttraining/orttraining/training_api/include/
+ - include/onnxruntime/core/session/**
+ - orttraining/orttraining/training_api/include/**
schedule:
- cron: '0 0 1 * *'
workflow_dispatch:
@@ -24,19 +24,19 @@ jobs:
name: Generate C/C++ API docs
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Install doxygen and dependencies
run: |
sudo apt update
sudo apt-get install libclang-dev
sudo apt-get install libclang-cpp14
- wget https://www.doxygen.nl/files/doxygen-1.9.6.linux.bin.tar.gz
- tar xvzf doxygen-1.9.6.linux.bin.tar.gz
+ wget https://www.doxygen.nl/files/doxygen-1.9.8.linux.bin.tar.gz
+ tar xvzf doxygen-1.9.8.linux.bin.tar.gz
- name: Run doxygen
run: |
mkdir -p build/doxygen
cd docs/c_cxx
- ../../doxygen-1.9.6/bin/doxygen
+ ../../doxygen-1.9.8/bin/doxygen
- name: Log source commit
run: git rev-parse --short HEAD > build/doxygen/html/source-version.txt
- name: Move C/C++ docs into site
diff --git a/.github/workflows/publish-csharp-apidocs.yml b/.github/workflows/publish-csharp-apidocs.yml
index 7d33a782fb488..9b9ca924bd008 100644
--- a/.github/workflows/publish-csharp-apidocs.yml
+++ b/.github/workflows/publish-csharp-apidocs.yml
@@ -6,7 +6,7 @@ on:
branches:
- main
paths:
- - csharp
+ - csharp/**
schedule:
- cron: '0 0 1 * *'
workflow_dispatch:
@@ -24,11 +24,11 @@ jobs:
env:
DOCFXVERSION: 2.62.2
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Setup .NET
- uses: actions/setup-dotnet@v2
+ uses: actions/setup-dotnet@v3
with:
- dotnet-version: 5.0.x
+ dotnet-version: 6.0.x
- name: Restore dependencies
run: dotnet restore csharp/ApiDocs/ApiDocs.csproj
- name: Download DocFX
diff --git a/.github/workflows/publish-gh-pages.yml b/.github/workflows/publish-gh-pages.yml
index 5ddb1e3bb03d1..1818261b4b766 100644
--- a/.github/workflows/publish-gh-pages.yml
+++ b/.github/workflows/publish-gh-pages.yml
@@ -1,101 +1,16 @@
-# Sample workflow for building and deploying a Jekyll site to GitHub Pages
+# This is a placeholder workflow only. Its purpose is for manual runs to show up
+# in the GitHub web UI. It is not used for any automated runs.
name: Publish site
on:
- # Runs on pushes targeting the branch where the website sources live
- push:
- branches: ["gh-pages"]
-
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
-# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
-permissions:
- contents: read
- pages: write
- id-token: write
-
-# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
-# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
-concurrency:
- group: "pages"
- cancel-in-progress: false
-
jobs:
- # Build job
- build:
+ placeholder:
runs-on: ubuntu-latest
steps:
- - name: Checkout
- uses: actions/checkout@v3
- with:
- ref: gh-pages
-
- - name: Setup Pages
- uses: actions/configure-pages@v3
-
- - name: Build with Jekyll
- uses: actions/jekyll-build-pages@v1
- with:
- source: ./
- destination: ./_site
-
- - name: Download C apidocs artifact
- uses: dawidd6/action-download-artifact@v2
- with:
- name: onnxruntime-c-apidocs
- workflow: publish-c-apidocs.yml
- branch: main
- path: apidocs
-
- - name: Download C# apidocs artifact
- uses: dawidd6/action-download-artifact@v2
- with:
- name: onnxruntime-csharp-apidocs
- workflow: publish-csharp-apidocs.yml
- branch: main
- path: apidocs
-
- - name: Download Java apidocs artifact
- uses: dawidd6/action-download-artifact@v2
- with:
- name: onnxruntime-java-apidocs
- workflow: publish-java-apidocs.yml
- branch: main
- path: apidocs
-
- - name: Download Python apidocs artifact
- uses: dawidd6/action-download-artifact@v2
- with:
- name: onnxruntime-python-apidocs
- workflow: publish-python-apidocs.yml
- branch: main
- path: apidocs
-
- - name: Move apidocs folder into place
+ - name: Placeholder step to have workflow included in the GitHub web UI
run: |
- sudo rm -rf _site/docs/api/c
- sudo mv apidocs/docs/api/c _site/docs/api
- sudo rm -rf _site/docs/api/csharp
- sudo mv apidocs/docs/api/csharp _site/docs/api
- sudo rm -rf _site/docs/api/java
- sudo mv apidocs/docs/api/java _site/docs/api
- sudo rm -rf _site/docs/api/python
- sudo mv apidocs/docs/api/python _site/docs/api
-
- - name: Upload site
- uses: actions/upload-pages-artifact@v1
- with:
- retention-days: 21
-
- # Deployment job
- deploy:
- environment:
- name: github-pages
- url: ${{ steps.deployment.outputs.page_url }}
- runs-on: ubuntu-latest
- needs: build
- steps:
- - name: Deploy to GitHub Pages
- id: deployment
- uses: actions/deploy-pages@v1
+ echo "Placeholder step to have workflow included in the GitHub web UI"
+ echo "The actual publish workflow is run from the gh-pages branch"
diff --git a/.github/workflows/publish-java-apidocs.yml b/.github/workflows/publish-java-apidocs.yml
index b81ea47c7fc37..9ea9bda7e7c53 100644
--- a/.github/workflows/publish-java-apidocs.yml
+++ b/.github/workflows/publish-java-apidocs.yml
@@ -6,7 +6,7 @@ on:
branches:
- main
paths:
- - java
+ - java/**
schedule:
- cron: '0 0 1 * *'
workflow_dispatch:
@@ -23,7 +23,7 @@ jobs:
name: Generate Java docs
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Set up JDK 11
uses: actions/setup-java@v3
with:
diff --git a/.github/workflows/publish-js-apidocs.yml b/.github/workflows/publish-js-apidocs.yml
new file mode 100644
index 0000000000000..ba8bfd718abfa
--- /dev/null
+++ b/.github/workflows/publish-js-apidocs.yml
@@ -0,0 +1,50 @@
+name: Update JS API Docs
+
+# Run when the JS API changes or every month so that the artifact does not expire
+on:
+ push:
+ branches:
+ - main
+ paths:
+ - js/common/**
+ schedule:
+ - cron: '0 0 1 * *'
+ workflow_dispatch:
+
+concurrency:
+ group: "apidocs-js"
+ cancel-in-progress: false
+
+permissions:
+ contents: write
+
+jobs:
+ build:
+ name: Generate JS API docs
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup Node.js
+ uses: actions/setup-node@v3
+ with:
+ node-version: 18
+ - name: Generate JS docs
+ run: |
+ cd js/
+ npm ci
+ cd common/
+ npm ci
+ npx typedoc
+ - name: Log source commit
+ run: git rev-parse --short HEAD > js/common/docs/source-version.txt
+ - name: Move JS docs into site
+ run: |
+ rm -rf _site/docs/api/js
+ mkdir -p _site/docs/api
+ mv js/common/docs _site/docs/api/js
+ - name: Upload docs artifact
+ uses: actions/upload-artifact@v3
+ with:
+ name: onnxruntime-node-apidocs
+ path: _site
+ retention-days: 60
diff --git a/.github/workflows/publish-objectivec-apidocs.yml b/.github/workflows/publish-objectivec-apidocs.yml
index 9ce6e68c5b8bb..1b327eebfa8a8 100644
--- a/.github/workflows/publish-objectivec-apidocs.yml
+++ b/.github/workflows/publish-objectivec-apidocs.yml
@@ -6,7 +6,7 @@ on:
branches:
- main
paths:
- - objectivec
+ - objectivec/**
schedule:
- cron: '0 0 1 * *'
workflow_dispatch:
@@ -21,9 +21,9 @@ permissions:
jobs:
build:
name: Generate Objective-C API docs
- runs-on: macos-12
+ runs-on: macos-13
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Install Jazzy
run: |
diff --git a/.github/workflows/publish-python-apidocs.yml b/.github/workflows/publish-python-apidocs.yml
index 263dbe92299a5..ab9d4781afb83 100644
--- a/.github/workflows/publish-python-apidocs.yml
+++ b/.github/workflows/publish-python-apidocs.yml
@@ -6,7 +6,8 @@ on:
branches:
- main
paths:
- - onnxruntime/python
+ - onnxruntime/python/**
+ - docs/python/**
schedule:
- cron: '0 0 1 * *'
workflow_dispatch:
@@ -23,7 +24,7 @@ jobs:
name: Generate Python API docs
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Install tools
run: |
sudo apt-get update
@@ -34,19 +35,19 @@ jobs:
python3 -m pip install --upgrade pip
cd docs/python
python3 -m pip install -r requirements.txt
- python3 -m pip install -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ ort-nightly
+ python3 -m pip install --pre onnxruntime-training -f https://download.onnxruntime.ai/onnxruntime_nightly_cpu.html
python3 -m pip list
- name: Generate Python docs with Sphinx
run: |
cd tools/doc
./builddoc.sh /usr/bin ../.. ../../build
- name: Log source commit
- run: git rev-parse --short HEAD > build/docs/inference/html/source-version.txt
+ run: git rev-parse --short HEAD > build/docs/html/source-version.txt
- name: Move Python docs into site
run: |
rm -rf _site/docs/api/python
- mkdir -p _site/docs/api
- mv build/docs/inference/html _site/docs/api/python
+ mkdir -p _site/docs/api/
+ mv build/docs/html _site/docs/api/python
- name: Upload docs artifact
uses: actions/upload-artifact@v3
with:
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index f2c61d3359df1..ba24e7eebfb03 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -2,18 +2,54 @@ name: Windows_CI
on:
push:
branches:
- - master
- main
+ - rel-*
pull_request:
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+ cancel-in-progress: true
+
+env:
+ AZCOPY_AUTO_LOGIN_TYPE: MSI
+ AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
+
jobs:
+ Windows-CUDA-12:
+ runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-mms"]
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: false
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.11.x'
+ architecture: 'x64'
+
+ - uses: actions/setup-node@v3
+ with:
+ node-version: 18
+
+ - name: Download cuda
+ run: azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v12.2" cuda_sdk
+
+
+ - name: Delete build folder
+ run: |
+ if (Test-Path D:\b) { Remove-Item -Recurse -Force D:\b }
+ &tools\ci_build\github\windows\install_third_party_deps.ps1 -cpu_arch x64 -install_prefix D:\b\Debug\installed -build_config Debug
+
+ # The build machine doesn't have a GPU. So the value of CMAKE_CUDA_ARCHITECTURES doesn't matter.
+ - name: Build code
+ run: python tools\ci_build\build.py --windows_sdk_version 10.0.22621.0 --enable_training --build_java --config Debug --build_dir D:\b --skip_submodule_sync --build_csharp --update --build --parallel --cmake_generator "Visual Studio 17 2022" --build_shared_lib --enable_pybind --use_cuda --cuda_home=${{ github.workspace }}\cuda_sdk\v12.2 --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75
+
Onnxruntime-TVM:
- runs-on: windows-2019
+ runs-on: windows-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
with:
submodules: true
- - uses: actions/setup-python@v3
+ - uses: actions/setup-python@v4
with:
python-version: '3.8.x'
architecture: 'x64'
@@ -32,6 +68,14 @@ jobs:
- name: 'Setup TVM EP Python requirements'
run: |
python3 -m pip install -r ${{ github.workspace }}/tools/ci_build/github/linux/tvm/requirements.txt
+ - name: 'rm gtest in conda'
+ shell: pwsh
+ run: |
+ Remove-Item 'C:\Miniconda\Library\lib\cmake\gtest' -Recurse -Force
+ Remove-Item 'C:\Miniconda\Library\lib\gmock.lib' -Force
+ Remove-Item 'C:\Miniconda\Library\lib\gmock_main.lib' -Force
+ Remove-Item 'C:\Miniconda\Library\lib\gtest.lib' -Force
+ Remove-Item 'C:\Miniconda\Library\lib\gtest_main.lib' -Force
- name: 'Build and Test'
run: |
python3 ${{ github.workspace }}/tools/ci_build/build.py --build_dir build --config Release --skip_submodule_sync --parallel --enable_pybind --disable_contrib_ops --disable_ml_ops --skip_onnx_tests --use_tvm
diff --git a/.gitmodules b/.gitmodules
index 1319cfacc1170..7bb49e98bfec1 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,16 +1,10 @@
[submodule "cmake/external/onnx"]
path = cmake/external/onnx
url = https://github.com/onnx/onnx.git
-[submodule "cmake/external/eigen"]
- path = cmake/external/eigen
- url = https://gitlab.com/libeigen/eigen.git
[submodule "cmake/external/libprotobuf-mutator"]
path = cmake/external/libprotobuf-mutator
url = https://github.com/google/libprotobuf-mutator.git
[submodule "cmake/external/emsdk"]
path = cmake/external/emsdk
url = https://github.com/emscripten-core/emsdk.git
- branch = 3.1.32
-[submodule "cmake/external/onnxruntime-extensions"]
- path = cmake/external/onnxruntime-extensions
- url = https://github.com/microsoft/onnxruntime-extensions.git
+ branch = 3.1.44
diff --git a/.lintrunner.toml b/.lintrunner.toml
index daf8cc4c908aa..86be8d0d0bd38 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -64,36 +64,6 @@ init_command = [
'--dry-run={{DRYRUN}}',
'--requirement=requirements-lintrunner.txt',
]
-
-[[linter]]
-code = 'RUFF-FIX'
-include_patterns = [
- '**/*.py',
- '**/*.pyi',
-]
-exclude_patterns = [
- 'cmake/external/**',
- # ignore generated flatbuffers code
- 'onnxruntime/core/flatbuffers/ort_flatbuffers_py/**',
-]
-command = [
- 'python',
- '-m',
- 'lintrunner_adapters',
- 'run',
- 'ruff_fix_linter',
- '--config=pyproject.toml',
- '@{{PATHSFILE}}'
-]
-init_command = [
- 'python',
- '-m',
- 'lintrunner_adapters',
- 'run',
- 'pip_init',
- '--dry-run={{DRYRUN}}',
- '--requirement=requirements-lintrunner.txt',
-]
is_formatter = true
@@ -184,10 +154,10 @@ exclude_patterns = [
'java/**', # FIXME: Enable clang-format for java
'js/**',
'onnxruntime/contrib_ops/cuda/bert/tensorrt_fused_multihead_attention/**', # Contains data chunks
- 'onnxruntime/core/flatbuffers/schema/ort.fbs.h', # Generated code
+ 'onnxruntime/core/flatbuffers/schema/*.fbs.h', # Generated code
'onnxruntime/core/graph/contrib_ops/quantization_defs.cc',
'onnxruntime/core/mlas/**', # Contains assembly code
- 'winml/**', # FIXME: Enable clang-format for winml
+ 'winml/lib/Api.Image/shaders/**', # Contains data chunks
]
command = [
'python',
diff --git a/.pipelines/OneBranch.Nuget-WindowsAI-Pipeline.Official.yml b/.pipelines/OneBranch.Nuget-WindowsAI-Pipeline.Official.yml
index fa2b475fe9c1c..b9de1b79e1d51 100644
--- a/.pipelines/OneBranch.Nuget-WindowsAI-Pipeline.Official.yml
+++ b/.pipelines/OneBranch.Nuget-WindowsAI-Pipeline.Official.yml
@@ -351,6 +351,31 @@ extends:
- script: |
dir $(Build.SourcesDirectory)\unzipped\runtimes\win-x64\_native
+ - task: EsrpCodeSigning@2
+ displayName: "Sign Nuget package"
+ inputs:
+ ConnectedServiceName: 'OnnxRuntime CodeSign 20190817'
+ FolderPath: $(Build.ArtifactStagingDirectory)
+ Pattern: '*.nupkg'
+ signConfigType: inlineSignParams
+ inlineOperation: |
+ [
+ {
+ "keyCode": "CP-401405",
+ "operationSetCode": "NuGetSign",
+ "parameters": [ ],
+ "toolName": "sign",
+ "toolVersion": "1.0"
+ },
+ {
+ "keyCode": "CP-401405",
+ "operationSetCode": "NuGetVerify",
+ "parameters": [ ],
+ "toolName": "sign",
+ "toolVersion": "1.0"
+ }
+ ]
+
- job: NuGet_Publishing
pool:
type: windows
diff --git a/.pipelines/nuget_config/x64/packages.config b/.pipelines/nuget_config/x64/packages.config
index 2ec9b577c1379..2ac650b0e6dc9 100644
--- a/.pipelines/nuget_config/x64/packages.config
+++ b/.pipelines/nuget_config/x64/packages.config
@@ -1,6 +1,6 @@
-
+
diff --git a/.pipelines/nuget_config/x86/packages.config b/.pipelines/nuget_config/x86/packages.config
index 6f9e3ef4960b9..f80f96194a230 100644
--- a/.pipelines/nuget_config/x86/packages.config
+++ b/.pipelines/nuget_config/x86/packages.config
@@ -1,6 +1,6 @@
-
+
diff --git a/.pipelines/windowsai-steps.yml b/.pipelines/windowsai-steps.yml
index 0b736da427261..45ebf889c5da1 100644
--- a/.pipelines/windowsai-steps.yml
+++ b/.pipelines/windowsai-steps.yml
@@ -80,11 +80,11 @@ jobs:
# must call vsdevcmd first to add cmake to PATH
- script: |
- curl -O -L https://github.com/Kitware/CMake/releases/download/v3.24.3/cmake-3.24.3-windows-x86_64.zip
- 7z x cmake-3.24.3-windows-x86_64.zip
+ curl -O -L https://github.com/Kitware/CMake/releases/download/v3.26.3/cmake-3.26.3-windows-x86_64.zip
+ 7z x cmake-3.26.3-windows-x86_64.zip
set PYTHONHOME=$(Build.BinariesDirectory)\${{ parameters.PythonPackageName }}.3.9.7\tools
set PYTHONPATH=$(Build.BinariesDirectory)\${{ parameters.PythonPackageName }}.3.9.7\tools
- $(Build.BinariesDirectory)\${{ parameters.PythonPackageName }}.3.9.7\tools\python.exe "$(Build.SourcesDirectory)\tools\ci_build\build.py" --build_dir $(Build.BinariesDirectory) --build_shared_lib --enable_onnx_tests --ms_experimental --use_dml --use_winml --cmake_generator "Visual Studio 16 2019" --update --config RelWithDebInfo --enable_lto --use_telemetry --disable_rtti --enable_wcos $(BuildFlags) --cmake_extra_defines CMAKE_SYSTEM_VERSION=10.0.19041.0 --cmake_path $(Build.BinariesDirectory)\cmake-3.24.3-windows-x86_64\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake-3.24.3-windows-x86_64\bin\ctest.exe
+ $(Build.BinariesDirectory)\${{ parameters.PythonPackageName }}.3.9.7\tools\python.exe "$(Build.SourcesDirectory)\tools\ci_build\build.py" --build_dir $(Build.BinariesDirectory) --build_shared_lib --enable_onnx_tests --ms_experimental --use_dml --use_winml --cmake_generator "Visual Studio 17 2022" --update --config RelWithDebInfo --enable_lto --use_telemetry --disable_rtti --enable_wcos $(BuildFlags) --cmake_extra_defines CMAKE_SYSTEM_VERSION=10.0.19041.0 --cmake_path $(Build.BinariesDirectory)\cmake-3.26.3-windows-x86_64\bin\cmake.exe --ctest_path $(Build.BinariesDirectory)\cmake-3.26.3-windows-x86_64\bin\ctest.exe
workingDirectory: '$(Build.BinariesDirectory)'
displayName: 'Generate cmake config'
diff --git a/.vscode/settings.json b/.vscode/settings.json
index fd28e2d7b335c..b7a1292efb2c6 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -40,4 +40,3 @@
"-build/include_subdir",
"-runtime/references"
]
-}
diff --git a/Package.swift b/Package.swift
index 7f8bfe0c3c18d..f8bf33001ea24 100644
--- a/Package.swift
+++ b/Package.swift
@@ -21,7 +21,7 @@ import class Foundation.ProcessInfo
let package = Package(
name: "onnxruntime",
- platforms: [.iOS(.v11)],
+ platforms: [.iOS(.v12)],
products: [
.library(name: "onnxruntime",
type: .static,
@@ -32,7 +32,14 @@ let package = Package(
.target(name: "OnnxRuntimeBindings",
dependencies: ["onnxruntime"],
path: "objectivec",
- exclude: ["test", "docs", "ReadMe.md", "format_objc.sh"],
+ exclude: ["test", "docs", "ReadMe.md", "format_objc.sh",
+ "ort_checkpoint.mm",
+ "ort_checkpoint_internal.h",
+ "ort_training_session_internal.h",
+ "ort_training_session.mm",
+ "include/ort_checkpoint.h",
+ "include/ort_training_session.h",
+ "include/onnxruntime_training.h"],
cxxSettings: [
.define("SPM_BUILD"),
.unsafeFlags(["-std=c++17",
diff --git a/README.md b/README.md
index 68850f4be8ec1..22ef387f5a7cd 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,6 @@
**ONNX Runtime training** can accelerate the model training time on multi-node NVIDIA GPUs for transformer models with a one-line addition for existing PyTorch training scripts. [Learn more →](https://www.onnxruntime.ai/docs/#onnx-runtime-for-training)
-
## Get Started & Resources
* **General Information**: [onnxruntime.ai](https://onnxruntime.ai)
@@ -17,12 +16,12 @@
* [**Upcoming Release Roadmap**](https://github.com/microsoft/onnxruntime/wiki/Upcoming-Release-Roadmap)
-* **Companion sample repositories**:
+* **Companion sample repositories**:
- ONNX Runtime Inferencing: [microsoft/onnxruntime-inference-examples](https://github.com/microsoft/onnxruntime-inference-examples)
- ONNX Runtime Training: [microsoft/onnxruntime-training-examples](https://github.com/microsoft/onnxruntime-training-examples)
+## Builtin Pipeline Status
-## Build Pipeline Status
|System|Inference|Training|
|---|---|---|
|Windows|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Windows%20CPU%20CI%20Pipeline?label=Windows+CPU)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=9)
[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Windows%20GPU%20CI%20Pipeline?label=Windows+GPU)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=10)
[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/Windows%20GPU%20TensorRT%20CI%20Pipeline?label=Windows+GPU+TensorRT)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=47)||
@@ -33,6 +32,11 @@
|Web|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/ONNX%20Runtime%20Web%20CI%20Pipeline?label=Web)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=161)||
|Other|[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/onnxruntime-binary-size-checks-ci-pipeline?repoName=microsoft%2Fonnxruntime&label=Binary+Size+Check)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=187&repoName=microsoft%2Fonnxruntime)
[![Build Status](https://dev.azure.com/onnxruntime/onnxruntime/_apis/build/status/onnxruntime-python-checks-ci-pipeline?label=Python+Checks)](https://dev.azure.com/onnxruntime/onnxruntime/_build/latest?definitionId=164)||
+## Third-party Pipeline Status
+
+|System|Inference|Training|
+|---|---|---|
+|Linux|[![Build Status](https://github.com/Ascend/onnxruntime/actions/workflows/build-and-test.yaml/badge.svg)](https://github.com/Ascend/onnxruntime/actions/workflows/build-and-test.yaml)||
## Data/Telemetry
diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt
index b4d981d42dfb8..700206180decd 100644
--- a/ThirdPartyNotices.txt
+++ b/ThirdPartyNotices.txt
@@ -5993,3 +5993,309 @@ https://github.com/tensorflow/tfjs
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
+
+——
+
+curl/curl
+
+https://github.com/curl
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (C) Daniel Stenberg, , and many
+contributors, see the THANKS file.
+
+All rights reserved.
+
+Permission to use, copy, modify, and distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright
+notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
+NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall not
+be used in advertising or otherwise to promote the sale, use or other dealings
+in this Software without prior written authorization of the copyright holder.
+
+_____
+
+Intel neural-compressor
+
+https://github.com/intel/neural-compressor
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ ============================================================================
+
+ Copyright 2016-2019 Intel Corporation
+ Copyright 2018 YANDEX LLC
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ This distribution includes third party software ("third party programs").
+ This third party software, even if included with the distribution of
+ the Intel software, may be governed by separate license terms, including
+ without limitation, third party license terms, other Intel software license
+ terms, and open source software license terms. These separate license terms
+ govern your use of the third party programs as set forth in the
+ "THIRD-PARTY-PROGRAMS" file.
+
+_____
+
+FlashAttention, https://github.com/Dao-AILab/flash-attention
+
+BSD 3-Clause License
+
+Copyright (c) 2022, the respective contributors, as shown by the AUTHORS file.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+_____
+
+composable_kernel
+
+https://github.com/ROCmSoftwarePlatform/composable_kernel
+
+Copyright (c) 2018- , Advanced Micro Devices, Inc. (Chao Liu, Jing Zhang)
+Copyright (c) 2019- , Advanced Micro Devices, Inc. (Letao Qin, Qianfeng Zhang, Liang Huang, Shaojie Wang)
+Copyright (c) 2022- , Advanced Micro Devices, Inc. (Anthony Chang, Chunyu Lai, Illia Silin, Adam Osewski, Poyen Chen, Jehandad Khan)
+Copyright (c) 2019-2021, Advanced Micro Devices, Inc. (Hanwen Chang)
+Copyright (c) 2019-2020, Advanced Micro Devices, Inc. (Tejash Shah)
+Copyright (c) 2020 , Advanced Micro Devices, Inc. (Xiaoyan Zhou)
+Copyright (c) 2021-2022, Advanced Micro Devices, Inc. (Jianfeng Yan)
+
+SPDX-License-Identifier: MIT
+Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/VERSION_NUMBER b/VERSION_NUMBER
index 141f2e805bebd..092afa15df4df 100644
--- a/VERSION_NUMBER
+++ b/VERSION_NUMBER
@@ -1 +1 @@
-1.15.0
+1.17.0
diff --git a/build.amd64.1411.bat b/build.amd64.1411.bat
deleted file mode 100644
index 5a289c13e0488..0000000000000
--- a/build.amd64.1411.bat
+++ /dev/null
@@ -1,12 +0,0 @@
-:: Copyright (c) Microsoft Corporation. All rights reserved.
-:: Licensed under the MIT License.
-
-rem This will setup the VC env vars to use the 14.11 (VS2017 ver15.3) toolchain which is supported by CUDA 9.2 prior to running build.py.
-rem It currently defaults to amd64 but that could be made configurable if that would be useful to developers running this locally.
-@echo off
-
-rem Use 14.11 toolset
-call "%VCINSTALLDIR%\Auxiliary\Build\vcvarsall.bat" amd64 -vcvars_ver=14.11
-
-rem Requires a python 3.6 or higher install to be available in your PATH
-python %~dp0\tools\ci_build\build.py --build_dir %~dp0\build\Windows %*
\ No newline at end of file
diff --git a/build.bat b/build.bat
index d7f6a8513ebd1..d0c6cbcddd669 100644
--- a/build.bat
+++ b/build.bat
@@ -2,5 +2,9 @@
:: Licensed under the MIT License.
@echo off
-rem Requires a python 3.6 or higher install to be available in your PATH
-python %~dp0\tools\ci_build\build.py --build_dir %~dp0\build\Windows %*
\ No newline at end of file
+
+setlocal
+set PATH=C:\Program Files\Git\usr\bin;%PATH%
+
+rem Requires a Python install to be available in your PATH
+python "%~dp0\tools\ci_build\build.py" --build_dir "%~dp0\build\Windows" %*
diff --git a/build.sh b/build.sh
index 4c28facf7807a..bf799ac8b7211 100755
--- a/build.sh
+++ b/build.sh
@@ -18,5 +18,4 @@ elif [[ "$*" == *"--android"* ]]; then
DIR_OS="Android"
fi
-#requires python3.6 or higher
python3 $DIR/tools/ci_build/build.py --build_dir $DIR/build/$DIR_OS "$@"
diff --git a/cgmanifests/cgmanifest.json b/cgmanifests/cgmanifest.json
index b190fd6e8171e..2a3de3bb0ee51 100644
--- a/cgmanifests/cgmanifest.json
+++ b/cgmanifests/cgmanifest.json
@@ -563,6 +563,15 @@
},
"comments": "python-pillow. Implementation logic for anti-aliasing copied by Resize CPU kernel."
}
+ },
+ {
+ "component": {
+ "type": "git",
+ "git": {
+ "commitHash": "d10b27fe37736d2944630ecd7557cefa95cf87c9",
+ "repositoryUrl": "https://gitlab.com/libeigen/eigen.git"
+ }
+ }
}
],
"Version": 1
diff --git a/cgmanifests/generate_cgmanifest.py b/cgmanifests/generate_cgmanifest.py
index 7d775996835da..a9eaacc6f2938 100644
--- a/cgmanifests/generate_cgmanifest.py
+++ b/cgmanifests/generate_cgmanifest.py
@@ -91,7 +91,7 @@ def add_github_dep(name, parsed_url):
with open(
- os.path.join(REPO_DIR, "tools", "ci_build", "github", "linux", "docker", "Dockerfile.manylinux2014_cuda11"),
+ os.path.join(REPO_DIR, "tools", "ci_build", "github", "linux", "docker", "Dockerfile.manylinux2_28_cuda11"),
) as f:
for line in f:
if not line.strip():
diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json
index 989756361bd07..6f1ca84e1a304 100644
--- a/cgmanifests/generated/cgmanifest.json
+++ b/cgmanifests/generated/cgmanifest.json
@@ -82,17 +82,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "d10b27fe37736d2944630ecd7557cefa95cf87c9",
- "repositoryUrl": "https://gitlab.com/libeigen/eigen.git"
- },
- "comments": "git submodule at cmake/external/eigen"
- }
- },
- {
- "component": {
- "type": "git",
- "git": {
- "commitHash": "0ab19024f08c6673a713e454ef8bd95e174c807f",
+ "commitHash": "a896e3d066448b3530dbcaa48869fafefd738f57",
"repositoryUrl": "https://github.com/emscripten-core/emsdk.git"
},
"comments": "git submodule at cmake/external/emsdk"
@@ -112,7 +102,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "9b7bca2a723ff94edcd007d93b5d0cf1838591dc",
+ "commitHash": "e2525550194ce3d8a2c4a3af451c9d9b3ae6650e",
"repositoryUrl": "https://github.com/onnx/onnx.git"
},
"comments": "git submodule at cmake/external/onnx"
@@ -122,17 +112,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "81e7799c69044c745239202085eb0a98f102937b",
- "repositoryUrl": "https://github.com/microsoft/onnxruntime-extensions.git"
- },
- "comments": "git submodule at cmake/external/onnxruntime-extensions"
- }
- },
- {
- "component": {
- "type": "git",
- "git": {
- "commitHash": "8c0b94e793a66495e0b1f34a5eb26bd7dc672db0",
+ "commitHash": "29bf8085f3bf17b84d30e34b3d7ff8248fda404e",
"repositoryUrl": "https://github.com/abseil/abseil-cpp.git"
},
"comments": "abseil_cpp"
@@ -152,7 +132,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "e7e1482087f58913b80a20b04d5c58d9d6d90155",
+ "commitHash": "6e921e1b1d21e84a5c82416ba7ecd98e33a436d0",
"repositoryUrl": "https://github.com/HowardHinnant/date.git"
},
"comments": "date"
@@ -212,7 +192,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "436617053d0f39a1019a371c3a9aa599b3cb2cea",
+ "commitHash": "13de152c2a1cd73ff4df97bd2c406b6d15d34af3",
"repositoryUrl": "https://github.com/google/nsync.git"
},
"comments": "google_nsync"
@@ -222,7 +202,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "519beb0e52c842729b4b53731d27c0e0c32ab4a2",
+ "commitHash": "f8d7d77c06936315286eb55f8de22cd23c188571",
"repositoryUrl": "https://github.com/google/googletest.git"
},
"comments": "googletest"
@@ -262,7 +242,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "5f4caba4e7a9017816e47becdd918fcc872039ba",
+ "commitHash": "55f373e164d3f092dd6c7a56e3de6f90c4c6f3dc",
"repositoryUrl": "https://github.com/microsoft/wil.git"
},
"comments": "microsoft_wil"
@@ -282,7 +262,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "7bc4e1ae9b36ec8ee635c3629b59ec525bbe82b9",
+ "commitHash": "0a0b5fb001ce0233ae3a6f99d849c0649e5a7361",
"repositoryUrl": "https://github.com/boostorg/mp11.git"
},
"comments": "mp11"
@@ -292,17 +272,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "3b58938e025c41d2fcd89fa22028eefaa81a18ad",
- "repositoryUrl": "https://github.com/onnx/onnx.git"
- },
- "comments": "onnx"
- }
- },
- {
- "component": {
- "type": "git",
- "git": {
- "commitHash": "ba6a4fb34fdeaa3613bf981610c657e7b663a699",
+ "commitHash": "0462dc31ae78f48744b6141ae376df1f96d3f459",
"repositoryUrl": "https://github.com/onnx/onnx-tensorrt.git"
},
"comments": "onnx_tensorrt"
@@ -352,7 +322,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "5916273f79a21551890fd3d56fc5375a78d1598d",
+ "commitHash": "959002f82d7962a473d8bf301845f2af720e0aa4",
"repositoryUrl": "https://github.com/pytorch/cpuinfo.git"
},
"comments": "pytorch_cpuinfo"
@@ -372,7 +342,7 @@
"component": {
"type": "git",
"git": {
- "commitHash": "ff15c6ada150a5018c5ef2172401cb4529eac9c0",
+ "commitHash": "4cafc9196c4da9c817992b20f5253ef967685bf8",
"repositoryUrl": "https://github.com/dcleblanc/SafeInt.git"
},
"comments": "safeint"
@@ -402,50 +372,30 @@
"component": {
"type": "git",
"git": {
- "commitHash": "19cc035b6c6f2283573d29c7ea7f7d675cf750ce",
- "repositoryUrl": "https://github.com/openssl/openssl.git"
+ "commitHash": "72c943dea2b9240cd09efde15191e144bc7c7d38",
+ "repositoryUrl": "https://github.com/protocolbuffers/utf8_range.git"
},
- "comments": "openssl"
+ "comments": "utf8_range"
}
},
{
"component": {
"type": "git",
"git": {
- "commitHash": "f54b0e47a08782a6131cc3d60f94d038fa6e0a51",
- "repositoryUrl": "https://github.com/Tencent/rapidjson.git"
- },
- "comments": "rapidjson"
- }
- },
- {
- "component": {
- "type": "git",
- "git": {
- "commitHash": "da041154c6bac1a4aa98254a7d6819059e8ac0b0",
- "repositoryUrl": "https://github.com/boostorg/boost.git"
- },
- "comments": "boost"
- }
- },
- {
- "component": {
- "type": "git",
- "git": {
- "commitHash": "9a6546658657dbeb23245117b57f4e6cf6cdc3e6",
- "repositoryUrl": "https://github.com/libb64/libb64.git"
+ "commitHash": "94142d8391c9791ec71c38336436319a2d4ac7a0",
+ "repositoryUrl": "https://github.com/microsoft/onnxruntime-extensions.git"
},
- "comments": "b64"
+ "comments": "extensions"
}
},
{
"component": {
"type": "git",
"git": {
- "commitHash": "75a84807a019bf4961faf713df9d748f0fc83b47",
- "repositoryUrl": "https://github.com/triton-inference-server/server.git"
+ "commitHash": "d52ec01652b7d620386251db92455968d8d90bdc",
+ "repositoryUrl": "https://github.com/ROCmSoftwarePlatform/composable_kernel.git"
},
- "comments": "triton"
+ "comments": "composable_kernel"
}
}
]
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index dd4942caa40ce..496ca72bb1b6c 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -2,7 +2,8 @@
# Licensed under the MIT License.
# Minimum CMake required
-cmake_minimum_required(VERSION 3.24)
+cmake_minimum_required(VERSION 3.26)
+
cmake_policy(SET CMP0069 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
@@ -13,17 +14,22 @@ cmake_policy(SET CMP0117 NEW)
cmake_policy(SET CMP0104 OLD)
# Enable Hot Reload for MSVC compilers if supported.
-if (POLICY CMP0141)
- if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.26.0")
- cmake_policy(SET CMP0141 NEW)
- else()
- cmake_policy(SET CMP0141 OLD)
- endif()
-endif()
+cmake_policy(SET CMP0141 NEW)
# Project
project(onnxruntime C CXX ASM)
+# Disable fast-math for Intel oneAPI compiler
+if("${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM")
+ if("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC-like")
+ # Using icx-cl compiler driver with MSVC-like arguments
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:precise")
+ else()
+ # Using icpx compiler driver
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math")
+ endif()
+endif()
+
# Needed for Java
set(CMAKE_C_STANDARD 99)
@@ -48,14 +54,16 @@ if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose build type: Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif()
-if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 7)
- message(FATAL_ERROR "GCC version must not less than 7")
+if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 8)
+ message(FATAL_ERROR "GCC version must be greater than or equal to 8")
endif()
# Options
option(onnxruntime_RUN_ONNX_TESTS "Enable ONNX Compatibility Testing" OFF)
option(onnxruntime_GENERATE_TEST_REPORTS "Enable test report generation" OFF)
option(onnxruntime_ENABLE_STATIC_ANALYSIS "Enable static analysis" OFF)
+option(onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES "Use a custom SDL Rule. It is mainly for our CI build" OFF)
+option(onnxruntime_REDIRECT_STATIC_ANALYSIS_OUTPUTS_TO_FILE "Use a custom SDL Rule. It is mainly for our CI build" OFF)
option(onnxruntime_ENABLE_PYTHON "Enable python buildings" OFF)
# Enable it may cause LNK1169 error
option(onnxruntime_ENABLE_MEMLEAK_CHECKER "Experimental: Enable memory leak checker in Windows debug build" OFF)
@@ -68,7 +76,7 @@ option(onnxruntime_USE_QNN "Build with QNN support" OFF)
option(onnxruntime_USE_SNPE "Build with SNPE support" OFF)
option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF)
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
-option(onnxruntime_USE_JS "Build with JavaScript implemented kernels support" OFF)
+option(onnxruntime_USE_JSEP "Build with JavaScript implemented kernels support" OFF)
option(onnxruntime_BUILD_UNIT_TESTS "Build ONNXRuntime unit tests" ON)
option(onnxruntime_BUILD_CSHARP "Build C# library" OFF)
option(onnxruntime_BUILD_OBJC "Build Objective-C library" OFF)
@@ -76,7 +84,8 @@ option(onnxruntime_USE_PREINSTALLED_EIGEN "Use pre-installed EIGEN. Need to prov
option(onnxruntime_BUILD_BENCHMARKS "Build ONNXRuntime micro-benchmarks" OFF)
option(onnxruntime_USE_LLVM "Build TVM with LLVM" OFF)
-option(onnxruntime_USE_FLASH_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)
+cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "NOT WIN32; onnxruntime_USE_CUDA" OFF)
+option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)
option(onnxruntime_BUILD_FOR_NATIVE_MACHINE "Enable this option for turning on optimization specific to this machine" OFF)
option(onnxruntime_USE_AVX "Use AVX instructions" OFF)
@@ -122,6 +131,7 @@ option(onnxruntime_TVM_CUDA_RUNTIME "Build TVM with CUDA support" OFF)
option(onnxruntime_TVM_USE_LLVM "Build TVM with LLVM. Set customized path to llvm-config.exe here if need" OFF)
option(onnxruntime_TVM_USE_HASH "Build ipp-crypto library for support hash algorithm. It is defined for TVM only")
option(onnxruntime_USE_XNNPACK "Build with XNNPACK support. Provides an alternative math library on ARM, WebAssembly and x86." OFF)
+option(onnxruntime_USE_WEBNN "Build with WebNN support. Enable hardware acceleration in web browsers." OFF)
# Options related to reducing the binary size produced by the build
# XNNPACK EP requires the internal NHWC contrib ops to be available, so this option must be OFF when onnxruntime_USE_XNNPACK is ON
@@ -129,6 +139,7 @@ cmake_dependent_option(onnxruntime_DISABLE_CONTRIB_OPS "Disable contrib ops" OFF
option(onnxruntime_DISABLE_ML_OPS "Disable traditional ML ops" OFF)
option(onnxruntime_DISABLE_SPARSE_TENSORS "Disable sparse tensors data types" OFF)
option(onnxruntime_DISABLE_OPTIONAL_TYPE "Disable optional type" OFF)
+option(onnxruntime_DISABLE_FLOAT8_TYPES "Disable float 8 types" OFF)
option(onnxruntime_MINIMAL_BUILD "Exclude as much as possible from the build. Support ORT format models. No support for ONNX format models." OFF)
cmake_dependent_option(onnxruntime_DISABLE_RTTI "Disable RTTI" ON "NOT onnxruntime_ENABLE_PYTHON" OFF)
# For now onnxruntime_DISABLE_EXCEPTIONS will only work with onnxruntime_MINIMAL_BUILD, more changes (ONNX, non-CPU EP, ...) are required to run this standalone
@@ -162,8 +173,7 @@ option(onnxruntime_ENABLE_CPU_FP16_OPS "Build with advanced instruction sets" ON
option(onnxruntime_USE_NCCL "Build with NCCL support" OFF)
option(onnxruntime_USE_MPI "Build with MPI support" OFF)
-# build WebAssembly
-option(onnxruntime_BUILD_WEBASSEMBLY "Enable this option to create WebAssembly byte codes" OFF)
+# WebAssembly options
option(onnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB "Enable this option to create WebAssembly static library" OFF)
option(onnxruntime_ENABLE_WEBASSEMBLY_THREADS "Enable this option to create WebAssembly byte codes with multi-threads support" OFF)
option(onnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING "Enable this option to turn on exception catching" OFF)
@@ -208,9 +218,13 @@ option(onnxruntime_ENABLE_CPUINFO "Enable cpuinfo" ON)
# ATen fallback support
option(onnxruntime_ENABLE_ATEN "Enable ATen fallback" OFF)
+# Triton support
+option(onnxruntime_ENABLE_TRITON "Enable Triton" OFF)
+
# composable kernel is managed automatically, unless user want to explicitly disable it, it should not be manually set
option(onnxruntime_USE_COMPOSABLE_KERNEL "Enable composable kernel for ROCm EP" ON)
option(onnxruntime_USE_ROCBLAS_EXTENSION_API "Enable rocblas tuning for ROCm EP" OFF)
+option(onnxruntime_USE_TRITON_KERNEL "Enable triton compiled kernel" OFF)
option(onnxruntime_BUILD_KERNEL_EXPLORER "Build Kernel Explorer for testing and profiling GPU kernels" OFF)
option(onnxruntime_BUILD_CACHE "onnxruntime build with cache" OFF)
@@ -218,6 +232,7 @@ option(onnxruntime_BUILD_CACHE "onnxruntime build with cache" OFF)
cmake_dependent_option(MSVC_Z7_OVERRIDE "replacing /Zi and /ZI with /Z7 when using MSVC with CCache" ON "onnxruntime_BUILD_CACHE; MSVC" OFF)
option(onnxruntime_USE_AZURE "Build with azure inferencing support" OFF)
+option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for threadpool." OFF)
# ENABLE_TRAINING includes all training functionality
# The following 2 entry points
@@ -232,14 +247,21 @@ option(onnxruntime_USE_AZURE "Build with azure inferencing support" OFF)
# Some features are only enabled when onnxruntime_ENABLE_PYTHON is ON as they are only relevant
# when using python env
if (onnxruntime_ENABLE_TRAINING)
- set(onnxruntime_ENABLE_TRAINING_OPS ON)
set(onnxruntime_ENABLE_TRAINING_APIS ON)
- set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP ON)
+ set(onnxruntime_ENABLE_TRAINING_OPS ON)
set(onnxruntime_ENABLE_ATEN ON)
+ set(onnxruntime_ENABLE_TRITON ON)
+ if (NOT APPLE)
+ set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP ON)
+ endif()
endif()
if (onnxruntime_ENABLE_TRAINING_APIS)
set(onnxruntime_ENABLE_TRAINING_OPS ON)
+ if (onnxruntime_ENABLE_PYTHON AND NOT onnxruntime_ENABLE_TRAINING)
+ message(FATAL_ERROR "Standalone On-Device Training build is not supported with Python bindings! "
+ "Please use the --enable_training flag instead of the --enable_training_apis flag.")
+ endif()
endif()
if (onnxruntime_USE_CUDA)
@@ -259,11 +281,14 @@ if (onnxruntime_USE_ROCM)
endif()
if (NOT CMAKE_HIP_ARCHITECTURES)
- set(CMAKE_HIP_ARCHITECTURES "gfx906;gfx908;gfx90a;gfx1030")
+ set(CMAKE_HIP_ARCHITECTURES "gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101")
endif()
file(GLOB rocm_cmake_components ${onnxruntime_ROCM_HOME}/lib/cmake/*)
list(APPEND CMAKE_PREFIX_PATH ${rocm_cmake_components})
+ # Force cmake to accept the configured HIP compiler. Because the configured CMAKE_PREFIX_PATH does not work during
+ # enable_language(HIP), we might need to move configuring of CMAKE_PREFIX_PATH to build.py (in the future).
+ set(CMAKE_HIP_COMPILER_FORCED ON)
enable_language(HIP)
# NOTE: Flags -mllvm -amdgpu-early-inline-all=true are critical for gpu kernel code performance. -mllvm passes the
@@ -277,7 +302,15 @@ if (onnxruntime_USE_ROCM)
add_compile_options("$<$:SHELL:-x hip>")
if (NOT onnxruntime_HIPIFY_PERL)
- set(onnxruntime_HIPIFY_PERL ${onnxruntime_ROCM_HOME}/hip/bin/hipify-perl)
+ find_path(HIPIFY_PERL_PATH
+ NAMES hipify-perl
+ HINTS
+ ${onnxruntime_ROCM_HOME}/bin
+ ${onnxruntime_ROCM_HOME}/hip/bin)
+ if (HIPIFY_PERL_PATH-NOTFOUND)
+ MESSAGE(FATAL_ERROR "hipify-perl not found")
+ endif()
+ set(onnxruntime_HIPIFY_PERL ${HIPIFY_PERL_PATH}/hipify-perl)
endif()
# replicate strategy used by pytorch to get ROCM_VERSION
@@ -336,7 +369,7 @@ function(set_msvc_c_cpp_compiler_warning_level warning_level)
# only match the generator expression added by this function
list(FILTER opts
EXCLUDE REGEX "^\\$<\\$,\\$>:/W[0-4]>$")
- list(APPEND opts "$<$,$>:${warning_flag}>")
+ list(APPEND opts "$<$:${warning_flag}>")
set_property(DIRECTORY PROPERTY COMPILE_OPTIONS "${opts}")
endif()
endfunction()
@@ -395,6 +428,13 @@ if (NOT (UNIX AND onnxruntime_ENABLE_PYTHON AND onnxruntime_ENABLE_TRAINING AND
set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OFF)
endif()
+if (NOT (UNIX AND onnxruntime_USE_CUDA AND onnxruntime_ENABLE_PYTHON AND onnxruntime_ENABLE_TRAINING AND (NOT onnxruntime_BUILD_SHARED_LIB)))
+ if (onnxruntime_ENABLE_TRITON)
+ message(WARNING "onnxruntime_ENABLE_TRITON is turned OFF because it's designed to support CUDA training on Linux only currently.")
+ endif()
+ set(onnxruntime_ENABLE_TRITON OFF)
+endif()
+
set(onnxruntime_REQUIRE_PYTHON_EMBED_LIB OFF)
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
add_compile_definitions(ENABLE_TRAINING_TORCH_INTEROP)
@@ -405,6 +445,17 @@ if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
endif()
endif()
+if (onnxruntime_ENABLE_TRITON)
+ # Need SetOutputMLValue.
+ set(onnxruntime_ENABLE_ATEN ON)
+ add_compile_definitions(ENABLE_TRITON)
+
+ # Python::Python is required for building unit test executables.
+ if (onnxruntime_BUILD_UNIT_TESTS)
+ set(onnxruntime_REQUIRE_PYTHON_EMBED_LIB ON)
+ endif()
+endif()
+
# General C# properties
if (onnxruntime_BUILD_CSHARP)
check_language(CSharp)
@@ -503,6 +554,8 @@ if (WIN32)
list(APPEND ORT_WARNING_FLAGS "/wd4251")
# issued by thrust nonstandard extension used: nameless struct/union
list(APPEND ORT_WARNING_FLAGS "/wd4201")
+ # structure was padded due to __declspec(align())
+ list(APPEND ORT_WARNING_FLAGS "/wd4324")
# warning C4800: Implicit conversion from 'X' to bool. Possible information loss
if (onnxruntime_USE_OPENVINO)
list(APPEND ORT_WARNING_FLAGS "/wd4800")
@@ -536,30 +589,31 @@ if (WIN32)
endif()
else()
- check_cxx_compiler_flag(-Wunused-but-set-parameter HAS_UNUSED_BUT_SET_PARAMETER)
- check_cxx_compiler_flag(-Wunused-but-set-variable HAS_UNUSED_BUT_SET_VARIABLE)
- check_cxx_compiler_flag(-Wunused-variable HAS_UNUSED_VARIABLE)
+ check_cxx_compiler_flag(-Wambiguous-reversed-operator HAS_AMBIGUOUS_REVERSED_OPERATOR)
+ check_cxx_compiler_flag(-Wbitwise-instead-of-logical HAS_BITWISE_INSTEAD_OF_LOGICAL)
check_cxx_compiler_flag(-Wcast-function-type HAS_CAST_FUNCTION_TYPE)
- check_cxx_compiler_flag(-Wparentheses HAS_PARENTHESES)
- check_cxx_compiler_flag(-Wuseless-cast HAS_USELESS_CAST)
- check_cxx_compiler_flag(-Wnonnull-compare HAS_NONNULL_COMPARE)
- check_cxx_compiler_flag(-Wtautological-pointer-compare HAS_TAUTOLOGICAL_POINTER_COMPARE)
check_cxx_compiler_flag(-Wcatch-value HAS_CATCH_VALUE)
- check_cxx_compiler_flag(-Wmissing-braces HAS_MISSING_BRACES)
- check_cxx_compiler_flag(-Wignored-attributes HAS_IGNORED_ATTRIBUTES)
+ check_cxx_compiler_flag(-Wclass-memaccess HAS_CLASS_MEMACCESS)
+ check_cxx_compiler_flag(-Wdeprecated-anon-enum-enum-conversion HAS_DEPRECATED_ANON_ENUM_ENUM_CONVERSION)
+ check_cxx_compiler_flag(-Wdeprecated-builtins HAS_DEPRECATED_BUILTINS)
check_cxx_compiler_flag(-Wdeprecated-copy HAS_DEPRECATED_COPY)
check_cxx_compiler_flag(-Wdeprecated-declarations HAS_DEPRECATED_DECLARATIONS)
- check_cxx_compiler_flag(-Wclass-memaccess HAS_CLASS_MEMACCESS)
+ check_cxx_compiler_flag(-Wenum-constexpr-conversion HAS_ENUM_CONSTEXPR_CONVERSION)
+ check_cxx_compiler_flag(-Wformat-truncation HAS_FORMAT_TRUNCATION)
+ check_cxx_compiler_flag(-Wignored-attributes HAS_IGNORED_ATTRIBUTES)
check_cxx_compiler_flag(-Wmaybe-uninitialized HAS_MAYBE_UNINITIALIZED)
+ check_cxx_compiler_flag(-Wmissing-braces HAS_MISSING_BRACES)
+ check_cxx_compiler_flag(-Wnonnull-compare HAS_NONNULL_COMPARE)
+ check_cxx_compiler_flag(-Wparentheses HAS_PARENTHESES)
+ check_cxx_compiler_flag(-Wshorten-64-to-32 HAS_SHORTEN_64_TO_32)
check_cxx_compiler_flag(-Wstrict-aliasing HAS_STRICT_ALIASING)
check_nvcc_compiler_flag(-Wstrict-aliasing NVCC_HAS_STRICT_ALIASING)
- check_cxx_compiler_flag(-Wambiguous-reversed-operator HAS_AMBIGUOUS_REVERSED_OPERATOR)
- check_cxx_compiler_flag(-Wdeprecated-anon-enum-enum-conversion HAS_DEPRECATED_ANON_ENUM_ENUM_CONVERSION)
+ check_cxx_compiler_flag(-Wtautological-pointer-compare HAS_TAUTOLOGICAL_POINTER_COMPARE)
check_cxx_compiler_flag(-Wundefined-var-template HAS_UNDEFINED_VAR_TEMPLATE)
- check_cxx_compiler_flag(-Wformat-truncation HAS_FORMAT_TRUNCATION)
- check_cxx_compiler_flag(-Wbitwise-instead-of-logical HAS_BITWISE_INSTEAD_OF_LOGICAL)
- check_cxx_compiler_flag(-Wenum-constexpr-conversion HAS_ENUM_CONSTEXPR_CONVERSION)
- check_cxx_compiler_flag(-Wdeprecated-builtins HAS_DEPRECATED_BUILTINS)
+ check_cxx_compiler_flag(-Wunused-but-set-parameter HAS_UNUSED_BUT_SET_PARAMETER)
+ check_cxx_compiler_flag(-Wunused-but-set-variable HAS_UNUSED_BUT_SET_VARIABLE)
+ check_cxx_compiler_flag(-Wunused-variable HAS_UNUSED_VARIABLE)
+ check_cxx_compiler_flag(-Wuseless-cast HAS_USELESS_CAST)
check_function_exists(reallocarray HAS_REALLOCARRAY)
if (HAS_TAUTOLOGICAL_POINTER_COMPARE)
@@ -593,6 +647,11 @@ else()
target_compile_options(libprotobuf-lite PRIVATE "-Wno-enum-constexpr-conversion")
endif()
endif()
+
+ # enable warning(s) that may not be on by default
+ if (HAS_SHORTEN_64_TO_32)
+ list(APPEND ORT_WARNING_FLAGS -Wshorten-64-to-32)
+ endif()
endif()
#names in this var must match the directory names under onnxruntime/core/providers
@@ -610,13 +669,16 @@ if (onnxruntime_USE_CUDA)
if (onnxruntime_DISABLE_CONTRIB_OPS)
set(onnxruntime_USE_FLASH_ATTENTION OFF)
+ set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
endif()
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6)
message( STATUS "Turn off flash attention since CUDA compiler version < 11.6")
set(onnxruntime_USE_FLASH_ATTENTION OFF)
+ set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
endif()
else()
set(onnxruntime_USE_FLASH_ATTENTION OFF)
+ set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
endif()
if (onnxruntime_USE_CUDA)
@@ -629,6 +691,11 @@ if (onnxruntime_USE_CUDA)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_FLASH_ATTENTION=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_FLASH_ATTENTION=1)
endif()
+ if (onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)
+ message( STATUS "Enable memory efficient attention for CUDA EP")
+ list(APPEND ORT_PROVIDER_FLAGS -DUSE_MEMORY_EFFICIENT_ATTENTION=1)
+ list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_MEMORY_EFFICIENT_ATTENTION=1)
+ endif()
endif()
if (onnxruntime_USE_VITISAI)
@@ -662,9 +729,9 @@ if (onnxruntime_USE_NNAPI_BUILTIN)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_NNAPI_BUILTIN=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES nnapi)
endif()
-if (onnxruntime_USE_JS)
- list(APPEND ORT_PROVIDER_FLAGS -DUSE_JS=1)
- list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_JS=1)
+if (onnxruntime_USE_JSEP)
+ list(APPEND ORT_PROVIDER_FLAGS -DUSE_JSEP=1)
+ list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_JSEP=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES js)
endif()
if (onnxruntime_USE_QNN)
@@ -722,6 +789,11 @@ if (onnxruntime_USE_XNNPACK)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_XNNPACK=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES xnnpack)
endif()
+if (onnxruntime_USE_WEBNN)
+ list(APPEND ORT_PROVIDER_FLAGS -DUSE_WEBNN=1)
+ list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBNN=1)
+ list(APPEND ONNXRUNTIME_PROVIDER_NAMES webnn)
+endif()
if (onnxruntime_USE_CANN)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_CANN=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CANN=1)
@@ -732,7 +804,9 @@ if (onnxruntime_USE_AZURE)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_AZURE=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES azure)
endif()
-
+if (onnxruntime_USE_LOCK_FREE_QUEUE)
+ add_compile_definitions(USE_LOCK_FREE_QUEUE)
+endif()
if (onnxruntime_ENABLE_LAZY_TENSOR)
# To support LazyTensor, ORT needs to call Python function from C/C++.
@@ -763,7 +837,7 @@ function(onnxruntime_set_compile_flags target_name)
if(UNIX)
target_compile_definitions(${target_name} PRIVATE PLATFORM_POSIX)
endif()
- target_compile_definitions(${target_name} PUBLIC EIGEN_USE_THREADS)
+ target_compile_definitions(${target_name} PRIVATE EIGEN_USE_THREADS)
if (onnxruntime_DISABLE_CONTRIB_OPS)
target_compile_definitions(${target_name} PRIVATE DISABLE_CONTRIB_OPS)
endif()
@@ -779,18 +853,25 @@ function(onnxruntime_set_compile_flags target_name)
if (onnxruntime_DISABLE_OPTIONAL_TYPE)
target_compile_definitions(${target_name} PRIVATE DISABLE_OPTIONAL_TYPE)
endif()
+
+ if (onnxruntime_DISABLE_FLOAT8_TYPES)
+ target_compile_definitions(${target_name} PRIVATE DISABLE_FLOAT8_TYPES)
+ endif()
+
if (onnxruntime_ENABLE_ATEN)
target_compile_definitions(${target_name} PRIVATE ENABLE_ATEN)
endif()
set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON)
if (onnxruntime_USE_CUDA)
# Suppress a "conversion_function_not_usable" warning in gsl/span
- target_compile_options(${target_name} PRIVATE "$<$:SHELL:--diag-suppress 554>")
+ target_compile_options(${target_name} PRIVATE "$<$:SHELL:-Xcudafe \"--diag_suppress=conversion_function_not_usable\">")
+ target_compile_definitions(${target_name} PRIVATE -DDISABLE_CUSPARSE_DEPRECATED)
endif()
if (MSVC)
foreach(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
- target_compile_options(${target_name} PRIVATE "$<$,$>:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>")
+ target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>")
endforeach()
+
foreach(onnxruntime_external_lib IN LISTS onnxruntime_EXTERNAL_LIBRARIES)
#TODO: the list contains cmake keywords like "debug". We should exclude them.
if(TARGET ${onnxruntime_external_lib})
@@ -800,26 +881,39 @@ function(onnxruntime_set_compile_flags target_name)
if(onnxruntime_external_lib_include_dir MATCHES "^\\$]+)>$")
string(REGEX REPLACE "^\\$]+)>$" "\\1" onnxruntime_external_lib_include_dir_cmake "${onnxruntime_external_lib_include_dir}")
cmake_path(NATIVE_PATH onnxruntime_external_lib_include_dir_cmake NORMALIZE onnxruntime_external_lib_include_dir_native)
- target_compile_options(${target_name} PRIVATE "$<$,$>:/external:I${onnxruntime_external_lib_include_dir_native}>")
+ target_compile_options(${target_name} PRIVATE "$<$:/external:I${onnxruntime_external_lib_include_dir_native}>")
endif()
else()
cmake_path(NATIVE_PATH onnxruntime_external_lib_include_dir NORMALIZE onnxruntime_external_lib_include_dir_native)
- target_compile_options(${target_name} PRIVATE "$<$,$>:/external:I${onnxruntime_external_lib_include_dir_native}>")
+ target_compile_options(${target_name} PRIVATE "$<$:/external:I${onnxruntime_external_lib_include_dir_native}>")
endif()
endforeach()
endif()
endforeach()
- target_compile_definitions(${target_name} PUBLIC -DPLATFORM_WINDOWS -DNOGDI -DNOMINMAX -D_USE_MATH_DEFINES -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS)
+ target_compile_definitions(${target_name} PRIVATE -DPLATFORM_WINDOWS -DNOGDI -DNOMINMAX -D_USE_MATH_DEFINES -D_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS)
if (onnxruntime_ENABLE_MEMLEAK_CHECKER)
- target_compile_definitions(${target_name} PUBLIC -DONNXRUNTIME_ENABLE_MEMLEAK_CHECK)
+ target_compile_definitions(${target_name} PRIVATE -DONNXRUNTIME_ENABLE_MEMLEAK_CHECK)
endif()
- target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /utf-8>" "$<$,$>:/utf-8>")
- target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /sdl>" "$<$,$>:/sdl>")
+ target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /utf-8>" "$<$:/utf-8>")
+ target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /sdl>" "$<$:/sdl>")
set_target_properties(${target_name}
PROPERTIES VS_GLOBAL_CAExcludePath "${ORT_BINARY_DIR};${ORT_SOURCE_DIR}")
+ # We do not treat warnings from 3rd-party libraries as errors. In order to do that, we need to add their header files locations to /external:I.
+ target_compile_options(${target_name} PRIVATE "$<$:/experimental:external>" "$<$:SHELL:--compiler-options /experimental:external>")
+ target_compile_options(${target_name} PRIVATE "$<$:/external:W0>" "$<$:SHELL:--compiler-options /external:W0>")
+ target_compile_options(${target_name} PRIVATE "$<$:/external:templates->" "$<$:SHELL:--compiler-options /external:templates->")
+ target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CURRENT_SOURCE_DIR}>" "$<$:SHELL:--compiler-options /external:I${CMAKE_CURRENT_SOURCE_DIR}>")
+ target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CURRENT_BINARY_DIR}>" "$<$:SHELL:--compiler-options /external:I${CMAKE_CURRENT_BINARY_DIR}>")
if (onnxruntime_ENABLE_STATIC_ANALYSIS)
- target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /analyze>" "$<$,$>:/analyze>")
- target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /analyze:external->" "$<$,$>:/analyze:external->")
+ target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /analyze>" "$<$:/analyze>")
+ if (onnxruntime_REDIRECT_STATIC_ANALYSIS_OUTPUTS_TO_FILE)
+ target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /analyze:autolog:ext.sarif>" "$<$:/analyze:autolog:ext.sarif>")
+ endif()
+ target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /analyze:external->" "$<$:/analyze:external->")
+ target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /wd6385>" )
+ # There are many such warnings from STL:
+ # include\list(148): warning C6011: Dereferencing NULL pointer '_Mycont'. : Lines: 146, 147, 148
+ target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options /wd6011>" )
endif()
else()
# Enable warning
@@ -858,6 +952,7 @@ function(onnxruntime_set_compile_flags target_name)
# float16.h:90:12: error: ‘tmp’ is used uninitialized
list(APPEND ORT_HIP_WARNING_FLAGS -Wno-uninitialized)
+ list(APPEND ORT_HIP_WARNING_FLAGS -Wno-deprecated-copy)
# some #pragma unroll will fail, do not treat them as error
# #warning must not be treated as error
@@ -887,12 +982,11 @@ function(onnxruntime_configure_target target_name)
target_link_directories(${target_name} PRIVATE ${onnxruntime_LINK_DIRS})
onnxruntime_set_compile_flags(${target_name})
onnxruntime_set_source_file_properties(${target_name})
- #Uncomment the following three lines to reproduce static analysis errors locally
- #if(WIN32 AND onnxruntime_ENABLE_STATIC_ANALYSIS)
- # set_target_properties(${target_name} PROPERTIES VS_USER_PROPS ${PROJECT_SOURCE_DIR}/EnableVisualStudioCodeAnalysis.props)
- #endif()
+ if(WIN32 AND onnxruntime_ENABLE_STATIC_ANALYSIS AND onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES)
+ set_target_properties(${target_name} PROPERTIES VS_USER_PROPS ${PROJECT_SOURCE_DIR}/EnableVisualStudioCodeAnalysis.props)
+ endif()
target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${abseil_cpp_SOURCE_DIR})
- if (onnxruntime_ENABLE_TRAINING_APIS)
+ if (onnxruntime_ENABLE_TRAINING_OPS)
target_include_directories(${target_name} PRIVATE ${ORTTRAINING_ROOT})
endif()
if (onnxruntime_ENABLE_LTO)
@@ -936,7 +1030,16 @@ function(onnxruntime_add_shared_library_module target_name)
endif()
onnxruntime_configure_target(${target_name})
- if (MSVC AND onnxruntime_target_platform STREQUAL "x86" AND NOT onnxruntime_BUILD_WEBASSEMBLY)
+ if (MSVC AND onnxruntime_target_platform STREQUAL "x86")
+ target_link_options(${target_name} PRIVATE /SAFESEH)
+ endif()
+endfunction()
+
+function(onnxruntime_add_object_library target_name)
+ add_library(${target_name} OBJECT ${ARGN})
+
+ onnxruntime_configure_target(${target_name})
+ if (MSVC AND onnxruntime_target_platform STREQUAL "x86")
target_link_options(${target_name} PRIVATE /SAFESEH)
endif()
endfunction()
@@ -944,7 +1047,7 @@ endfunction()
function(onnxruntime_add_executable target_name)
add_executable(${target_name} ${ARGN})
onnxruntime_configure_target(${target_name})
- if (MSVC AND onnxruntime_target_platform STREQUAL "x86" AND NOT onnxruntime_BUILD_WEBASSEMBLY)
+ if (MSVC AND onnxruntime_target_platform STREQUAL "x86")
target_link_options(${target_name} PRIVATE /SAFESEH)
endif()
endfunction()
@@ -1139,17 +1242,19 @@ if (onnxruntime_USE_OPENVINO)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2022.3")
set(OPENVINO_VERSION "2022.3")
add_definitions(-DOPENVINO_2022_3=1)
+ elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
+ set(OPENVINO_VERSION "2023.0")
+ add_definitions(-DOPENVINO_2023_0=1)
+ elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1")
+ set(OPENVINO_VERSION "2023.1")
+ add_definitions(-DOPENVINO_2023_1=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
- set(OPENVINO_VERSION "2022.3")
- add_definitions(-DOPENVINO_2022_3=1)
+ set(OPENVINO_VERSION "2023.1")
+ add_definitions(-DOPENVINO_2023_1=1)
else()
message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
endif()
- if (onnxruntime_USE_OPENVINO_MYRIAD)
- add_definitions(-DOPENVINO_CONFIG_MYRIAD=1)
- endif()
-
if (onnxruntime_USE_OPENVINO_GPU_FP32)
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
endif()
@@ -1166,17 +1271,12 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
endif()
- if (onnxruntime_USE_OPENVINO_VAD_M)
- add_definitions(-DOPENVINO_CONFIG_VAD_M=1)
+ if (onnxruntime_USE_OPENVINO_VPUX_FP16)
+ add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
endif()
- if (onnxruntime_USE_OPENVINO_VAD_F)
- add_definitions(-DOPENVINO_CONFIG_VAD_F=1)
- endif()
-
- if (onnxruntime_USE_OPENVINO_MYRIAD_NP)
- add_definitions(-DOPENVINO_CONFIG_MYRIAD=1)
- add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
+ if (onnxruntime_USE_OPENVINO_VPUX_U8)
+ add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
endif()
if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
@@ -1199,13 +1299,13 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()
- if (onnxruntime_USE_OPENVINO_VAD_M_NP)
- add_definitions(-DOPENVINO_CONFIG_VAD_M=1)
+ if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
+ add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()
- if (onnxruntime_USE_OPENVINO_VAD_F_NP)
- add_definitions(-DOPENVINO_CONFIG_VAD_F=1)
+ if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
+ add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()
@@ -1231,17 +1331,27 @@ if (onnxruntime_USE_OPENVINO)
endif()
if (onnxruntime_USE_VITISAI)
- if (WIN32)
- message(FATAL_ERROR "Vitis-AI execution provider is not supported on Windows.")
- else()
- include(pyxir)
- list(APPEND onnxruntime_EXTERNAL_LIBRARIES pyxir)
- list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES pyxir)
- endif()
-endif()
-
+ set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_LIST_DIR}")
+endif()
+
+set(ORT_BUILD_INFO "ORT Build Info: ")
+find_package(Git)
+if (Git_FOUND)
+ execute_process(COMMAND ${GIT_EXECUTABLE} log -1 --format=%h
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ OUTPUT_VARIABLE ORT_GIT_COMMIT)
+ string(STRIP "${ORT_GIT_COMMIT}" ORT_GIT_COMMIT)
+ execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ OUTPUT_VARIABLE ORT_GIT_BRANCH)
+ string(STRIP "${ORT_GIT_BRANCH}" ORT_GIT_BRANCH)
+ string(APPEND ORT_BUILD_INFO "git-branch=${ORT_GIT_BRANCH}, git-commit-id=${ORT_GIT_COMMIT}, ")
+endif()
+string(APPEND ORT_BUILD_INFO "build type=${CMAKE_BUILD_TYPE}")
+string(APPEND ORT_BUILD_INFO ", cmake cxx flags: ${CMAKE_CXX_FLAGS}")
configure_file(onnxruntime_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime_config.h)
-if (WIN32)
+get_property(onnxruntime_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
+if (onnxruntime_GENERATOR_IS_MULTI_CONFIG)
configure_file(../requirements.txt.in ${CMAKE_CURRENT_BINARY_DIR}/Debug/requirements.txt)
configure_file(../requirements.txt.in ${CMAKE_CURRENT_BINARY_DIR}/Release/requirements.txt)
configure_file(../requirements.txt.in ${CMAKE_CURRENT_BINARY_DIR}/RelWithDebInfo/requirements.txt)
@@ -1271,9 +1381,9 @@ if (onnxruntime_USE_CUDA)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_30,code=sm_30") # K series
endif()
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12)
- # 37, 50 still work in CUDA 11 but are marked deprecated and will be removed in future CUDA version.
- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_37,code=sm_37") # K80
- set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_50,code=sm_50") # M series
+ # 37, 50 still work in CUDA 11 but are marked deprecated and will be removed in future CUDA version.
+ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_37,code=sm_37") # K80
+ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_50,code=sm_50") # M series
endif()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_52,code=sm_52") # M60
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60") # P series
@@ -1390,6 +1500,7 @@ if (UNIX AND onnxruntime_USE_MPI)
find_path(NCCL_INCLUDE_DIR
NAMES ${NCCL_LIBNAME}.h
HINTS
+ ${onnxruntime_NCCL_HOME}/include/rccl
${onnxruntime_NCCL_HOME}/include
$ENV{CUDA_ROOT}/include)
@@ -1556,7 +1667,7 @@ if (onnxruntime_BUILD_CSHARP)
list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_csharp)
endif()
-if (onnxruntime_BUILD_WEBASSEMBLY)
+if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
message(STATUS "WebAssembly Build is enabled")
list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_webassembly)
@@ -1627,3 +1738,31 @@ if (onnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS)
COMMENT "Installing protobuf"
)
endif()
+
+if(TARGET onnxruntime)
+# Install
+ include(GNUInstallDirs)
+ include(CMakePackageConfigHelpers)
+ set(PROJECT_CONFIG_CONTENT "@PACKAGE_INIT@\n")
+ string(APPEND PROJECT_CONFIG_CONTENT
+ "include(\"\${CMAKE_CURRENT_LIST_DIR}/${PROJECT_NAME}Targets.cmake\")")
+ file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/PROJECT_CONFIG_FILE" ${PROJECT_CONFIG_CONTENT})
+ install(EXPORT ${PROJECT_NAME}Targets
+ NAMESPACE ${PROJECT_NAME}::
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME})
+# Create config for find_package()
+ configure_package_config_file(
+ "${CMAKE_CURRENT_BINARY_DIR}/PROJECT_CONFIG_FILE" ${PROJECT_NAME}Config.cmake
+ INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
+
+ write_basic_package_version_file(
+ "${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
+ VERSION ${ORT_VERSION}
+ COMPATIBILITY SameMajorVersion)
+
+ install(
+ FILES
+ "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
+ "${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake"
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
+endif()
diff --git a/cmake/Sdl.ruleset b/cmake/Sdl.ruleset
index 7ca26ad5d28ad..5ab2341f6a1df 100644
--- a/cmake/Sdl.ruleset
+++ b/cmake/Sdl.ruleset
@@ -184,7 +184,6 @@
-
diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake
index 58a9271d26e7f..e825bfeaea952 100644
--- a/cmake/adjust_global_compile_flags.cmake
+++ b/cmake/adjust_global_compile_flags.cmake
@@ -15,7 +15,7 @@ if (NOT MSVC AND NOT onnxruntime_ENABLE_BITCODE)
string(APPEND CMAKE_C_FLAGS " -ffunction-sections -fdata-sections")
endif()
-if (onnxruntime_BUILD_WEBASSEMBLY)
+if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
string(APPEND CMAKE_C_FLAGS " -s STRICT=1 -s DEFAULT_TO_CXX=1")
string(APPEND CMAKE_CXX_FLAGS " -s STRICT=1 -s DEFAULT_TO_CXX=1")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s ALLOW_UNIMPLEMENTED_SYSCALLS=1")
@@ -51,10 +51,8 @@ if (onnxruntime_BUILD_WEBASSEMBLY)
# Build WebAssembly with multi-threads support.
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
- string(APPEND CMAKE_C_FLAGS " -pthread")
- string(APPEND CMAKE_CXX_FLAGS " -pthread")
- string(APPEND CMAKE_C_FLAGS " -s USE_PTHREADS=1 -Wno-pthreads-mem-growth")
- string(APPEND CMAKE_CXX_FLAGS " -s USE_PTHREADS=1 -Wno-pthreads-mem-growth")
+ string(APPEND CMAKE_C_FLAGS " -pthread -Wno-pthreads-mem-growth")
+ string(APPEND CMAKE_CXX_FLAGS " -pthread -Wno-pthreads-mem-growth")
endif()
endif()
@@ -222,27 +220,30 @@ endmacro()
#Set global compile flags for all the source code(including third_party code like protobuf)
#This section must be before any add_subdirectory, otherwise build may fail because /MD,/MT mismatch
if (MSVC)
- enable_language(ASM_MASM)
- if (CMAKE_GENERATOR_PLATFORM)
+ if (CMAKE_VS_PLATFORM_NAME)
# Multi-platform generator
- set(onnxruntime_target_platform ${CMAKE_GENERATOR_PLATFORM})
+ set(onnxruntime_target_platform ${CMAKE_VS_PLATFORM_NAME})
else()
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
endif()
if (onnxruntime_target_platform STREQUAL "ARM64")
set(onnxruntime_target_platform "ARM64")
+ enable_language(ASM_MARMASM)
elseif (onnxruntime_target_platform STREQUAL "ARM64EC")
- set(onnxruntime_target_platform "ARM64EC")
+ enable_language(ASM_MARMASM)
elseif (onnxruntime_target_platform STREQUAL "ARM" OR CMAKE_GENERATOR MATCHES "ARM")
set(onnxruntime_target_platform "ARM")
+ enable_language(ASM_MARMASM)
elseif (onnxruntime_target_platform STREQUAL "x64" OR onnxruntime_target_platform STREQUAL "x86_64" OR onnxruntime_target_platform STREQUAL "AMD64" OR CMAKE_GENERATOR MATCHES "Win64")
set(onnxruntime_target_platform "x64")
+ enable_language(ASM_MASM)
elseif (onnxruntime_target_platform STREQUAL "Win32" OR onnxruntime_target_platform STREQUAL "x86" OR onnxruntime_target_platform STREQUAL "i386" OR onnxruntime_target_platform STREQUAL "i686")
set(onnxruntime_target_platform "x86")
- if (NOT onnxruntime_BUILD_WEBASSEMBLY)
- message("Enabling SAFESEH for x86 build")
- set(CMAKE_ASM_MASM_FLAGS "${CMAKE_ASM_MASM_FLAGS} /safeseh")
- endif()
+ enable_language(ASM_MASM)
+ message("Enabling SAFESEH for x86 build")
+ set(CMAKE_ASM_MASM_FLAGS "${CMAKE_ASM_MASM_FLAGS} /safeseh")
+ else()
+ message(FATAL_ERROR "Unknown CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
endif()
@@ -254,10 +255,7 @@ if (MSVC)
string(APPEND CMAKE_CXX_FLAGS " /wd26812")
string(APPEND CMAKE_C_FLAGS " /wd26812")
endif()
- # We do not treat 3rd-party libraries' warnings as errors. In order to do that, we need to add their header files locations to /external:I.
- # However, if a 3rd-party library was installed to a non-standard location and cmake find it and use it from there, you may see build errors
- # like: "error C2220: the following warning is treated as an error"
- string(APPEND CMAKE_CXX_FLAGS " /experimental:external /external:W0 /external:templates- /external:I ${CMAKE_CURRENT_SOURCE_DIR} /external:I ${CMAKE_CURRENT_BINARY_DIR}")
+
if (onnxruntime_USE_AVX)
string(APPEND CMAKE_CXX_FLAGS " /arch:AVX")
string(APPEND CMAKE_C_FLAGS " /arch:AVX")
@@ -321,6 +319,11 @@ else()
string(APPEND CMAKE_CXX_FLAGS " -g -O0 --coverage ")
string(APPEND CMAKE_C_FLAGS " -g -O0 --coverage ")
endif()
+ if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU")
+ # suppress warnings from flatbuffers
+ string(APPEND CMAKE_CXX_FLAGS " -Wno-restrict ")
+ string(APPEND CMAKE_C_FLAGS " -Wno-restrict ")
+ endif()
# Check support for AVX and f16c.
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-mf16c" COMPILER_SUPPORT_MF16C)
@@ -354,14 +357,12 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
#For Mac compliance
message("Adding flags for Mac builds")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong")
-endif()
-
-if (WIN32)
+elseif (WIN32)
# parallel build
# These compiler opitions cannot be forwarded to NVCC, so cannot use add_compiler_options
string(APPEND CMAKE_CXX_FLAGS " /MP")
# required to be set explicitly to enable Eigen-Unsupported SpecialFunctions
string(APPEND CMAKE_CXX_FLAGS " -DEIGEN_HAS_C99_MATH")
-elseif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+else()
add_compile_definitions("_GNU_SOURCE")
endif()
diff --git a/cmake/deps.txt b/cmake/deps.txt
index d0a5cf7aa4800..279b5ca649dba 100644
--- a/cmake/deps.txt
+++ b/cmake/deps.txt
@@ -5,42 +5,43 @@
#URLs can be either https URLs or local file paths in cmake-style(directory separator is a forward slash character).
#SHA1 hashes can be generated by running sha1sum command.
#If you need to change abseil's version to a different one, you may also want to update external\abseil-cpp.natvis
-#since the file contains a version string: "lts_20220623". However, the file is for debugging purposes only and would
+#since the file contains a version string: "lts_20230802". However, the file is for debugging purposes only and would
#not affect built binaries.
-abseil_cpp;https://github.com/abseil/abseil-cpp/archive/refs/tags/20220623.1.zip;50c137c88965cba015dfcc8fd5d9b46d23146751
+abseil_cpp;https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.0.zip;04271dfbfac59269b6939e1e9d5faf0d18a7ba91
cxxopts;https://github.com/jarro2783/cxxopts/archive/3c73d91c0b04e2b59462f0a741be8c07024c1bc0.zip;6c6ca7f8480b26c8d00476e0e24b7184717fe4f0
-date;https://github.com/HowardHinnant/date/archive/refs/tags/v2.4.1.zip;ea99f021262b1d804a872735c658860a6a13cc98
+date;https://github.com/HowardHinnant/date/archive/refs/tags/v3.0.1.zip;2dac0c81dc54ebdd8f8d073a75c053b04b56e159
dlpack;https://github.com/dmlc/dlpack/archive/refs/tags/v0.6.zip;4d565dd2e5b31321e5549591d78aa7f377173445
+eigen;https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip;ee201b07085203ea7bd8eb97cbcb31b07cfa3efb
flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v1.12.0.zip;ba0a75fd12dbef8f6557a74e611b7a3d0c5fe7bf
fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494
fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1
google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.7.0.zip;e97c368b176e8614e3f1bf13dd9abcf6a7ad9908
-google_nsync;https://github.com/google/nsync/archive/refs/tags/1.23.0.zip;f3233450cf7156fc0bedd1b0e884eddec264897c
-googletest;https://github.com/google/googletest/archive/519beb0e52c842729b4b53731d27c0e0c32ab4a2.zip;4b3c37972e4c1bef1185d46f702082f8772ee73f
+google_nsync;https://github.com/google/nsync/archive/refs/tags/1.26.0.zip;5e7c00ef6bf5b787386fc040067903ec774e2752
+googletest;https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip;0ac421f2ec11af38b0fff0f1992184032731a8bc
googlexnnpack;https://github.com/google/XNNPACK/archive/003c580e696a774afdc984996ee909b7c8d8128c.zip;9f192e3f15e1e37ae9c78d53eeea47e45c5eb31c
json;https://github.com/nlohmann/json/archive/refs/tags/v3.10.5.zip;f257f8dc27c5b8c085dc887b40cddd18ae1f725c
microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14
-microsoft_wil;https://github.com/microsoft/wil/archive/5f4caba4e7a9017816e47becdd918fcc872039ba.zip;fd119887d0d17c37adf1fc227b054befa28158ad
+microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
-mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.79.0.zip;c8f04e378535ededbe5af52c8f969d2dedbe73d5
-onnx;https://github.com/onnx/onnx/archive/3b58938e025c41d2fcd89fa22028eefaa81a18ad.zip;e0e5dda9eea5cd5ecae3bd8be86e477016b6be02
-#use the last commit of 8.6-EA branch (https://github.com/onnx/onnx-tensorrt/commit/ba6a4fb34fdeaa3613bf981610c657e7b663a699)
-onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/ba6a4fb34fdeaa3613bf981610c657e7b663a699.zip;5a474ed86e2c4ee4085d3daeff8222866e933dc0
+mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
+onnx;https://github.com/onnx/onnx/archive/e2525550194ce3d8a2c4a3af451c9d9b3ae6650e.zip;782f23d788185887f520a90535513e244218e928
+#use the commit of supporting all the plugins and TRT 8.6-GA (https://github.com/onnx/onnx-tensorrt/commit/0462dc31ae78f48744b6141ae376df1f96d3f459)
+onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/0462dc31ae78f48744b6141ae376df1f96d3f459.zip;5ff086361956cceb81ed17453a1fd8db2aa4328d
protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa
+protoc_win64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip;b4521f7ada5b260380f94c4bd7f1b7684c76969a
+protoc_win32;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win32.zip;3688010318192c46ce73213cdfb6b3e5656da874
+protoc_linux_x64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-linux-x86_64.zip;338462004aa5be9fba45b35b5b4be43f69b47a90
+protoc_linux_x86;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-linux-x86_32.zip;61fdbe7d6360e065ec6fea23bca2cca673115fb8
+protoc_linux_aarch64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-linux-aarch_64.zip;df9d45470b0b8cf939dd2f0ec6b88e9cafc4d617
+protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-osx-universal_binary.zip;23710c3d1c2036d8d65a6a22234372fa2d7af9ef
psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
pthreadpool;https://github.com/Maratyszcza/pthreadpool/archive/1787867f6183f056420e532eec640cba25efafea.zip;e43e80781560c5ab404a4da20f34d846f5f5d101
pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.10.1.zip;769b6aa67a77f17a770960f604b727645b6f6a13
-pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/5916273f79a21551890fd3d56fc5375a78d1598d.zip;2be4d2ae321fada97cb39eaf4eeba5f8c85597cf
+pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/959002f82d7962a473d8bf301845f2af720e0aa4.zip;85da3caa60eb2b148613b443fbc2bfdc30689965
re2;https://github.com/google/re2/archive/refs/tags/2022-06-01.zip;aa77313b76e91b531ee7f3e45f004c6a502a5374
-safeint;https://github.com/dcleblanc/SafeInt/archive/ff15c6ada150a5018c5ef2172401cb4529eac9c0.zip;913a4046e5274d329af2806cb53194f617d8c0ab
+safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.0.0.zip;0f95b3c1fc1bd1175c4a90b2c9e39074d1bccefd
-# below are deps introduced by triton client, might remove after 1.14 release
-openssl;https://github.com/openssl/openssl/archive/refs/tags/openssl-3.0.7.zip;dda8fc81308555410505eb4a9eab3e1da0436a1d
-rapidjson;https://github.com/Tencent/rapidjson/archive/refs/tags/v1.1.0.zip;0fe7b4f7b83df4b3d517f4a202f3a383af7a0818
-boost;https://github.com/boostorg/boost/archive/refs/tags/boost-1.81.0.zip;f6ab0da855f825b4eb1abd949967d01a4c5e4e1b
-b64;https://github.com/libb64/libb64/archive/refs/tags/v2.0.0.1.zip;815b6d31d50d9e63df55b25ce555e7b787153c28
-pthread;https://sourceforge.net/projects/pthreads4w/files/pthreads4w-code-v3.0.0.zip;3b9e417e4474c34542b76ad40529e396ac109fb4
-triton;https://github.com/triton-inference-server/server/archive/refs/tags/v2.28.0.zip;4b305570aa1e889946e20e36050b6770e4108fee
-# above are deps introduced by triton client, might remove after 1.14 release
-extensions;https://github.com/microsoft/onnxruntime-extensions/archive/81e7799c69044c745239202085eb0a98f102937b.zip;d53487035174a046628359289ad27aa0ac0380c9
+utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240cd09efde15191e144bc7c7d38.zip;9925739c9debc0efa2adcb194d371a35b6a03156
+extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c
+composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/d52ec01652b7d620386251db92455968d8d90bdc.zip;6b5ce8edf3625f8817086c194fbf94b664e1b0e0
\ No newline at end of file
diff --git a/cmake/external/abseil-cpp.cmake b/cmake/external/abseil-cpp.cmake
index 54d2f9c5c19df..3bcd4109e2888 100644
--- a/cmake/external/abseil-cpp.cmake
+++ b/cmake/external/abseil-cpp.cmake
@@ -6,15 +6,20 @@ include(FetchContent)
# Pass to build
set(ABSL_PROPAGATE_CXX_STD 1)
set(BUILD_TESTING 0)
-
+set(ABSL_BUILD_TESTING OFF)
+set(ABSL_BUILD_TEST_HELPERS OFF)
+set(ABSL_USE_EXTERNAL_GOOGLETEST ON)
if(Patch_FOUND AND WIN32)
set(ABSL_PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/abseil/absl_windows.patch)
else()
set(ABSL_PATCH_COMMAND "")
endif()
-
+if(WIN32 AND NOT Patch_FOUND)
+ #see https://github.com/google/re2/issues/425 and https://github.com/google/re2/issues/436
+ set(ABSL_ENABLE_INSTALL ON)
+endif()
# NB! Advancing Abseil version changes its internal namespace,
-# currently absl::lts_20211102 which affects abseil-cpp.natvis debugger
+# currently absl::lts_20230125 which affects abseil-cpp.natvis debugger
# visualization file, that must be adjusted accordingly, unless we eliminate
# that namespace at build time.
FetchContent_Declare(
@@ -22,6 +27,7 @@ FetchContent_Declare(
URL ${DEP_URL_abseil_cpp}
URL_HASH SHA1=${DEP_SHA1_abseil_cpp}
PATCH_COMMAND ${ABSL_PATCH_COMMAND}
+ FIND_PACKAGE_ARGS NAMES absl
)
onnxruntime_fetchcontent_makeavailable(abseil_cpp)
@@ -37,8 +43,26 @@ if (GDK_PLATFORM)
target_compile_definitions(absl_symbolize PRIVATE WINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP)
endif()
-if(NOT onnxruntime_DISABLE_ABSEIL)
- set(ABSEIL_LIBS absl::inlined_vector absl::flat_hash_set
- absl::flat_hash_map absl::node_hash_set absl::node_hash_map absl::base absl::throw_delegate absl::raw_hash_set
- absl::hash absl::city absl::low_level_hash absl::raw_logging_internal)
-endif()
\ No newline at end of file
+# TODO: since multiple ORT's dependencies depend on Abseil, the list below would vary from version to version.
+# We'd better to not manually manage the list.
+set(ABSEIL_LIBS absl::base
+absl::city
+absl::core_headers
+absl::fixed_array
+absl::flags
+absl::flat_hash_map
+absl::flat_hash_set
+absl::hash
+absl::inlined_vector
+absl::low_level_hash
+absl::node_hash_map
+absl::node_hash_set
+absl::optional
+absl::raw_hash_set
+absl::raw_logging_internal
+absl::span
+absl::str_format
+absl::strings
+absl::synchronization
+absl::throw_delegate
+absl::time)
diff --git a/cmake/external/abseil-cpp.natvis b/cmake/external/abseil-cpp.natvis
index e0294ba6f7b55..e923d5862ec2e 100644
--- a/cmake/external/abseil-cpp.natvis
+++ b/cmake/external/abseil-cpp.natvis
@@ -1,6 +1,6 @@
-
+
@@ -24,7 +24,7 @@
-
+
empty
{{ size={size_} }}
@@ -44,7 +44,7 @@
-
+
{{ {value.first}:{value.second} }}
- value.first
diff --git a/cmake/external/composable_kernel.cmake b/cmake/external/composable_kernel.cmake
index fe57a5b5325e2..7168cd1a22c53 100644
--- a/cmake/external/composable_kernel.cmake
+++ b/cmake/external/composable_kernel.cmake
@@ -1,23 +1,24 @@
-set(composable_kernel_URL https://github.com/ROCmSoftwarePlatform/composable_kernel.git)
-set(composable_kernel_TAG ed3a2e52265e11daa366f47b082141a652b67c58) # 2023-04-10 21:02:17 +0800
-
set(PATCH ${PROJECT_SOURCE_DIR}/patches/composable_kernel/Fix_Clang_Build.patch)
include(FetchContent)
FetchContent_Declare(composable_kernel
- GIT_REPOSITORY ${composable_kernel_URL}
- GIT_TAG ${composable_kernel_TAG}
- PATCH_COMMAND git apply --reverse --check ${PATCH} || git apply --ignore-space-change --ignore-whitespace ${PATCH}
+ URL ${DEP_URL_composable_kernel}
+ URL_HASH SHA1=${DEP_SHA1_composable_kernel}
+ PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PATCH}
)
FetchContent_GetProperties(composable_kernel)
if(NOT composable_kernel_POPULATED)
FetchContent_Populate(composable_kernel)
set(BUILD_DEV OFF CACHE BOOL "Disable -Weverything, otherwise, error: 'constexpr' specifier is incompatible with C++98 [-Werror,-Wc++98-compat]" FORCE)
+ # Exclude i8 device gemm instances due to excessive long compilation time and not being used
+ set(DTYPES fp32 fp16 bf16)
+ set(INSTANCES_ONLY ON)
add_subdirectory(${composable_kernel_SOURCE_DIR} ${composable_kernel_BINARY_DIR} EXCLUDE_FROM_ALL)
add_library(onnxruntime_composable_kernel_includes INTERFACE)
target_include_directories(onnxruntime_composable_kernel_includes INTERFACE
${composable_kernel_SOURCE_DIR}/include
${composable_kernel_SOURCE_DIR}/library/include)
+ target_compile_definitions(onnxruntime_composable_kernel_includes INTERFACE __fp32__ __fp16__ __bf16__)
endif()
diff --git a/cmake/external/cutlass.cmake b/cmake/external/cutlass.cmake
index 18ac668bb1592..8c5d81d638ced 100644
--- a/cmake/external/cutlass.cmake
+++ b/cmake/external/cutlass.cmake
@@ -1,4 +1,4 @@
-if (onnxruntime_USE_FLASH_ATTENTION)
+if (onnxruntime_USE_FLASH_ATTENTION OR onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)
include(FetchContent)
FetchContent_Declare(
cutlass
diff --git a/cmake/external/dml.cmake b/cmake/external/dml.cmake
index 78813fa4498cc..5d25b9529e030 100644
--- a/cmake/external/dml.cmake
+++ b/cmake/external/dml.cmake
@@ -20,6 +20,7 @@
set(onnxruntime_USE_CUSTOM_DIRECTML OFF CACHE BOOL "Depend on a custom/internal build of DirectML.")
set(dml_EXTERNAL_PROJECT OFF CACHE BOOL "Build DirectML as a source dependency.")
+set(DML_SHARED_LIB DirectML.dll)
if (NOT onnxruntime_USE_CUSTOM_DIRECTML)
if (NOT(MSVC) OR NOT(WIN32))
@@ -40,8 +41,7 @@ if (NOT onnxruntime_USE_CUSTOM_DIRECTML)
set(NUGET_CONFIG ${PROJECT_SOURCE_DIR}/../NuGet.config)
set(PACKAGES_CONFIG ${PROJECT_SOURCE_DIR}/../packages.config)
get_filename_component(PACKAGES_DIR ${CMAKE_CURRENT_BINARY_DIR}/../packages ABSOLUTE)
- set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.10.1)
- set(DML_SHARED_LIB DirectML.dll)
+ set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.12.1)
# Restore nuget packages, which will pull down the DirectML redist package.
add_custom_command(
@@ -72,12 +72,12 @@ else()
if (dml_EXTERNAL_PROJECT)
set(dml_preset_config $,debug,release>)
set(dml_preset_name ${onnxruntime_target_platform}-win-redist-${dml_preset_config})
-
+ target_compile_definitions(DirectML INTERFACE DML_TARGET_VERSION_USE_LATEST=1)
include(ExternalProject)
ExternalProject_Add(
directml_repo
GIT_REPOSITORY https://dev.azure.com/microsoft/WindowsAI/_git/DirectML
- GIT_TAG 2290bd6495fdf8c35822816213516d13f3742cc9
+ GIT_TAG d460f0f46967bea878786f1bed69487692c779bf
GIT_SHALLOW OFF # not allowed when GIT_TAG is a commit SHA, which is preferred (it's stable, unlike branches)
GIT_PROGRESS ON
BUILD_IN_SOURCE ON
@@ -89,11 +89,13 @@ else()
# Target that consumers can use to link with the internal build of DirectML.
set(directml_install_path ${CMAKE_BINARY_DIR}/directml_repo-prefix/src/directml_repo/build/${dml_preset_name}/install)
+ set(DML_PACKAGE_DIR ${directml_install_path})
add_library(DirectML INTERFACE)
target_link_libraries(DirectML INTERFACE ${directml_install_path}/lib/DirectML.lib)
add_dependencies(DirectML directml_repo-install)
include_directories(BEFORE ${directml_install_path}/include)
else()
- include_directories(${dml_INCLUDE_DIR})
+ include_directories(BEFORE ${dml_INCLUDE_DIR})
+ set(DML_PACKAGE_DIR ${dml_INCLUDE_DIR}/..)
endif()
endif()
diff --git a/cmake/external/eigen b/cmake/external/eigen
deleted file mode 160000
index d10b27fe37736..0000000000000
--- a/cmake/external/eigen
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit d10b27fe37736d2944630ecd7557cefa95cf87c9
diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake
index 266dd534af64a..c0f7ddc50eb98 100644
--- a/cmake/external/eigen.cmake
+++ b/cmake/external/eigen.cmake
@@ -6,15 +6,17 @@ if (onnxruntime_USE_PREINSTALLED_EIGEN)
else ()
if (onnxruntime_USE_ACL)
FetchContent_Declare(
- eigen
- URL https://gitlab.com/libeigen/eigen/-/archive/d10b27fe37736d2944630ecd7557cefa95cf87c9/eigen-d10b27fe37736d2944630ecd7557cefa95cf87c9.zip
- PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-space-change --ignore-whitespace < ${PROJECT_SOURCE_DIR}/patches/eigen/Fix_Eigen_Build_Break.patch
- )
+ eigen
+ URL ${DEP_URL_eigen}
+ URL_HASH SHA1=${DEP_SHA1_eigen}
+ PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-space-change --ignore-whitespace < ${PROJECT_SOURCE_DIR}/patches/eigen/Fix_Eigen_Build_Break.patch
+ )
else()
FetchContent_Declare(
- eigen
- URL https://gitlab.com/libeigen/eigen/-/archive/d10b27fe37736d2944630ecd7557cefa95cf87c9/eigen-d10b27fe37736d2944630ecd7557cefa95cf87c9.zip
- )
+ eigen
+ URL ${DEP_URL_eigen}
+ URL_HASH SHA1=${DEP_SHA1_eigen}
+ )
endif()
FetchContent_Populate(eigen)
set(eigen_INCLUDE_DIRS "${eigen_SOURCE_DIR}")
diff --git a/cmake/external/emsdk b/cmake/external/emsdk
index 0ab19024f08c6..a896e3d066448 160000
--- a/cmake/external/emsdk
+++ b/cmake/external/emsdk
@@ -1 +1 @@
-Subproject commit 0ab19024f08c6673a713e454ef8bd95e174c807f
+Subproject commit a896e3d066448b3530dbcaa48869fafefd738f57
diff --git a/cmake/external/extensions.cmake b/cmake/external/extensions.cmake
index 5039929062445..68796ad02d982 100644
--- a/cmake/external/extensions.cmake
+++ b/cmake/external/extensions.cmake
@@ -22,7 +22,8 @@ if (onnxruntime_REDUCED_OPS_BUILD)
endif()
if (onnxruntime_WEBASSEMBLY_DEFAULT_EXTENSION_FLAGS)
- set(OCOS_ENABLE_SPM_TOKENIZER ON CACHE INTERNAL "")
+ #The generated protobuf files in ORT-extension needs be updated to work with the current protobuf version ORT is using.
+ set(OCOS_ENABLE_SPM_TOKENIZER OFF CACHE INTERNAL "")
set(OCOS_ENABLE_GPT2_TOKENIZER ON CACHE INTERNAL "")
set(OCOS_ENABLE_WORDPIECE_TOKENIZER ON CACHE INTERNAL "")
set(OCOS_ENABLE_BERT_TOKENIZER ON CACHE INTERNAL "")
@@ -54,9 +55,11 @@ endif()
target_include_directories(ocos_operators PRIVATE ${RE2_INCLUDE_DIR} ${json_SOURCE_DIR}/include)
target_include_directories(ortcustomops PUBLIC ${onnxruntime_EXTENSIONS_PATH}/includes)
if(OCOS_ENABLE_SPM_TOKENIZER)
- onnxruntime_add_include_to_target(sentencepiece-static ${PROTOBUF_LIB})
+ onnxruntime_add_include_to_target(sentencepiece-static ${PROTOBUF_LIB} ${ABSEIL_LIBS})
endif()
-onnxruntime_add_include_to_target(ocos_operators ${PROTOBUF_LIB})
+onnxruntime_add_include_to_target(ocos_operators ${PROTOBUF_LIB} ${ABSEIL_LIBS})
+onnxruntime_add_include_to_target(noexcep_operators ${PROTOBUF_LIB} ${ABSEIL_LIBS})
+
add_dependencies(ocos_operators ${onnxruntime_EXTERNAL_DEPENDENCIES})
add_dependencies(ortcustomops ${onnxruntime_EXTERNAL_DEPENDENCIES})
diff --git a/cmake/external/onnx b/cmake/external/onnx
index 9b7bca2a723ff..e2525550194ce 160000
--- a/cmake/external/onnx
+++ b/cmake/external/onnx
@@ -1 +1 @@
-Subproject commit 9b7bca2a723ff94edcd007d93b5d0cf1838591dc
+Subproject commit e2525550194ce3d8a2c4a3af451c9d9b3ae6650e
diff --git a/cmake/external/onnx_minimal.cmake b/cmake/external/onnx_minimal.cmake
index cf9429c185b46..65ff3fb148b11 100644
--- a/cmake/external/onnx_minimal.cmake
+++ b/cmake/external/onnx_minimal.cmake
@@ -27,18 +27,6 @@ target_compile_definitions(onnx_proto PUBLIC $
+
+ {{ { *(std::string*)((uintptr_t)(ptr_) & ~0x3) } }}
+
+
+
+
- {{ {key_.tagged_ptr_}:{value_.tagged_ptr_} }}
+ {{ {_impl_.key_.tagged_ptr_}:{_impl_.value_.tagged_ptr_} }}
- - key_.tagged_ptr_
- - value_.tagged_ptr_
+ - _impl_.key_.tagged_ptr_
+ - _impl_.value_.tagged_ptr_
-
+
- {{ tensor_name={tensor_name_.tagged_ptr_} }}
+ {{ tensor_name={_impl_.tensor_name_.tagged_ptr_} }}
- - tensor_name_.tagged_ptr_
- - quant_parameter_tensor_names_
+ - _impl_.tensor_name_.tagged_ptr_
+ - _impl_.quant_parameter_tensor_names_
-
- {{ name={name_.tagged_ptr_} }}
+
+ {{ name={_impl_.name_.tagged_ptr_} }}
- - name_.tagged_ptr_
- - type_
+ - _impl_.name_.tagged_ptr_
+ - _impl_.type_
- _has_type()
- {{ domain={ domain_.tagged_ptr_ }, version={ version_ } }}
+ {{ domain={ _impl_.domain_.tagged_ptr_ }, version={ _impl_.version_ } }}
-
-
-
-
- {{ name={ name_.tagged_ptr_ }, type={ (AttributeProto_AttributeType)type_ } }}
+
+
+
+
+ {{ name={ _impl_.name_.tagged_ptr_ }, type={ (AttributeProto_AttributeType)_impl_.type_ } }}
- - name_.tagged_ptr_
- - (AttributeProto_AttributeType)type_
- - i_
- - ints_
- - f_
- - floats_
- - s_.tagged_ptr_
- - strings_
- - t_
- - tensors_
- - t_
- - graphs_
- - t_
- - type_protos_
- - sparse_tensor_
- - sparse_tensors_
- - ref_attr_name_.tagged_ptr_
- - doc_string_.tagged_ptr_
+ - _impl_.name_.tagged_ptr_
+ - (AttributeProto_AttributeType)_impl_.type_
+ - _impl_.i_
+ - _impl_.ints_
+ - _impl_.f_
+ - _impl_.floats_
+ - _impl_.s_.tagged_ptr_
+ - _impl_.strings_
+ - _impl_.t_
+ - _impl_.tensors_
+ - _impl_.g_
+ - _impl_.graphs_
+ - _impl_.tp_
+ - _impl_.type_protos_
+ - _impl_.sparse_tensor_
+ - _impl_.sparse_tensors_
+ - _impl_.ref_attr_name_.tagged_ptr_
+ - _impl_.doc_string_.tagged_ptr_
- _has_tensor()
- _has_graph()
- _has_type_proto()
@@ -102,188 +108,191 @@
- {{ name={ name_.tagged_ptr_ }, domain={ domain_.tagged_ptr_ } }}
+ {{ name={ _impl_.name_.tagged_ptr_ }, domain={ _impl_.domain_.tagged_ptr_ } }}
- - name_.tagged_ptr_
- - domain_.tagged_ptr_
- - op_type_.tagged_ptr_
- - input_
- - output_
- - attribute_
+ - _impl_.name_.tagged_ptr_
+ - _impl_.domain_.tagged_ptr_
+ - _impl_.op_type_.tagged_ptr_
+ - _impl_.input_
+ - _impl_.output_
+ - _impl_.attribute_
-
+
- {{ name={ name_.tagged_ptr_ }, domain={ domain_.tagged_ptr_ } }}
+ {{ name={ _impl_.name_.tagged_ptr_ } }}
- - opset_import_
- - input_
- - output_
- - node_
- - doc_string_.tagged_ptr_
+ - _impl_.input_
+ - _impl_.output_
+ - _impl_.node_
+ - _impl_.attribute_
+ - _impl_.attribute_proto_
+ - _impl_.doc_string_.tagged_ptr_
+ - _impl_.opset_import_
+ - _impl_.domain_.tagged_ptr_
-
+
- {{ name={ name_.tagged_ptr_ } }}
+ {{ name={ _impl_.name_.tagged_ptr_ } }}
- - name_.tagged_ptr_
- - value_info_
- - input_
- - output_
- - node_
- - initializer_
- - sparse_initializer_
- - doc_string_.tagged_ptr_
+ - _impl_.name_.tagged_ptr_
+ - _impl_.value_info_
+ - _impl_.input_
+ - _impl_.output_
+ - _impl_.node_
+ - _impl_.initializer_
+ - _impl_.sparse_initializer_
+ - _impl_.doc_string_.tagged_ptr_
-
+
- {{ producer={producer_name_.tagged_ptr_}, domain={ domain_.tagged_ptr_ } }}
+ {{ producer={_impl_.producer_name_.tagged_ptr_}, domain={ _impl_.domain_.tagged_ptr_ } }}
- - opset_import_
- - metadata_props_
- - producer_version_.tagged_ptr_
- - ir_version_
- - model_version_
- - doc_string_.tagged_ptr_
- - graph_
+ - _impl_.opset_import_
+ - _impl_.metadata_props_
+ - _impl_.producer_version_.tagged_ptr_
+ - _impl_.ir_version_
+ - _impl_.model_version_
+ - _impl_.doc_string_.tagged_ptr_
+ - _impl_.graph_
-
+
- empty
- {{ v = {value_.dim_value_} }}
- {{ p = {value_.dim_param_.tagged_ptr_} }}
+ empty
+ {{ v = {_impl_.value_.dim_value_} }}
+ {{ p = {_impl_.value_.dim_param_.tagged_ptr_} }}
- {{ size={ dim_.current_size_ } }}
+ {{ size={ _impl_.dim_.current_size_ } }}
- - dim_
+ - _impl_.dim_
-
+
- {{ type={ (ValueCase)*_oneof_case_ } }}
+ {{ type={ (ValueCase)*_impl_._oneof_case_ } }}
- - (ValueCase)*_oneof_case_
- - value_.tensor_type_
- - value_.sequence_type_
- - value_.map_type_
- - value_.sparse_tensor_type_
- - value_.optional_type_
- - denotation_.tagged_ptr_
+ - (ValueCase)*_impl_._oneof_case_
+ - _impl_.value_.tensor_type_
+ - _impl_.value_.sequence_type_
+ - _impl_.value_.map_type_
+ - _impl_.value_.sparse_tensor_type_
+ - _impl_.value_.optional_type_
+ - _impl_.denotation_.tagged_ptr_
-
+
-
- {{ elem_type={ (TensorProto_DataType)elem_type_ } }}
+
+ {{ elem_type={ (TensorProto_DataType)_impl_.elem_type_ } }}
- - (TensorProto_DataType)elem_type_
- - shape_
+ - (TensorProto_DataType)_impl_.elem_type_
+ - _impl_.shape_
- _has_shape()
-
+
-
- {{ elem_type={ *elem_type_ } }}
+
+ {{ elem_type={ *_impl_.elem_type_ } }}
- - elem_type_
+ - _impl_.elem_type_
- _has_element_type()
-
+
-
- {{ key_type={ (TensorProto_DataType)key_type_ } }}
+
+ {{ key_type={ (TensorProto_DataType)_impl_.key_type_ } }}
- - (TensorProto_DataType)key_type_
- - value_type_
+ - (TensorProto_DataType)_impl_.key_type_
+ - _impl_.value_type_
- _has_value_type()
-
- {{ elem_type={ (TensorProto_DataType)elem_type_ } }}
+
+ {{ elem_type={ (TensorProto_DataType)_impl_.elem_type_ } }}
- - (TensorProto_DataType)elem_type_
- - shape_
+ - (TensorProto_DataType)_impl_.elem_type_
+ - _impl_.shape_
- _has_shape()
-
- {{ elem_type={ *elem_type_ } }}
+
+ {{ elem_type={ *_impl_.elem_type_ } }}
- - elem_type_
+ - _impl_.elem_type_
- _has_element_type()
- {{ begin={ begin_ }, end={ end_ } }}
+ {{ begin={ _impl_.begin_ }, end={ _impl_.end_ } }}
-
+
-
-
-
-
-
+
+
+
+
+
- {{ name={name_.tagged_ptr_}, data_type={ (TensorProto_DataType)data_type_ } }}
+ {{ name={_impl_.name_.tagged_ptr_}, data_type={ (TensorProto_DataType)_impl_.data_type_ } }}
- - name_.tagged_ptr_
- - (TensorProto_DataType)data_type_
+ - _impl_.name_.tagged_ptr_
+ - (TensorProto_DataType)_impl_.data_type_
- 1
- _shape_size_1()
- _shape_size_2()
- _shape_size_3()
- _shape_size_4()
- _shape_size_5()
- - dims_
- - float_data_
- - int32_data_
- - int32_data_
- - int32_data_
- - int32_data_
- - int64_data_
- - int64_data_
- - uint64_data_
- - double_data_
- - string_data_
- - (std::string*)(raw_data_.tagged_ptr_.ptr_)
- - (TensorProto_DataLocation) data_location_
- - external_data_
+ - _impl_.dims_
+ - _impl_.float_data_
+ - _impl_.int32_data_
+ - _impl_.int32_data_
+ - _impl_.int32_data_
+ - _impl_.int32_data_
+ - _impl_.int64_data_
+ - _impl_.int64_data_
+ - _impl_.uint64_data_
+ - _impl_.double_data_
+ - _impl_.string_data_
+ - _impl_.raw_data_.tagged_ptr_
+ - (TensorProto_DataLocation) _impl_.data_location_
+ - _impl_.external_data_
- _has_raw_data()
- _has_data_location()
- _has_segment()
-
+
-
-
+
+
{{ SparseTensorProto }}
- - dims_
- - values_
- - indices_
+ - _impl_.dims_
+ - _impl_.values_
+ - _impl_.indices_
- _has_values()
- _has_indices()
diff --git a/cmake/external/onnxruntime-extensions b/cmake/external/onnxruntime-extensions
deleted file mode 160000
index 81e7799c69044..0000000000000
--- a/cmake/external/onnxruntime-extensions
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 81e7799c69044c745239202085eb0a98f102937b
diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
index 0c0dea7df2a82..e1671bcf43ed9 100644
--- a/cmake/external/onnxruntime_external_deps.cmake
+++ b/cmake/external/onnxruntime_external_deps.cmake
@@ -19,11 +19,10 @@ endforeach()
message("Loading Dependencies ...")
# ABSL should be included before protobuf because protobuf may use absl
-if(NOT onnxruntime_DISABLE_ABSEIL)
- include(external/abseil-cpp.cmake)
-endif()
+include(external/abseil-cpp.cmake)
set(RE2_BUILD_TESTING OFF CACHE BOOL "" FORCE)
+
FetchContent_Declare(
re2
URL ${DEP_URL_re2}
@@ -34,19 +33,17 @@ FetchContent_Declare(
if (onnxruntime_BUILD_UNIT_TESTS)
# WebAssembly threading support in Node.js is still an experimental feature and
# not working properly with googletest suite.
- if (onnxruntime_BUILD_WEBASSEMBLY)
+ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(gtest_disable_pthreads ON)
endif()
set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
- if(NOT onnxruntime_DISABLE_ABSEIL)
- # It uses both ABSL and re2
- set(GTEST_HAS_ABSL OFF CACHE BOOL "" FORCE)
- endif()
+ # Set it to ON will cause crashes in onnxruntime_test_all when onnxruntime_USE_CUDA is ON
+ set(GTEST_HAS_ABSL OFF CACHE BOOL "" FORCE)
# gtest and gmock
FetchContent_Declare(
googletest
URL ${DEP_URL_googletest}
- FIND_PACKAGE_ARGS NAMES GTest
+ FIND_PACKAGE_ARGS 1.14.0...<2.0.0 NAMES GTest
URL_HASH SHA1=${DEP_SHA1_googletest}
)
endif()
@@ -84,22 +81,74 @@ FetchContent_Declare(
# Flatbuffers
# We do not need to build flatc for iOS or Android Cross Compile
-if (CMAKE_SYSTEM_NAME STREQUAL "iOS" OR CMAKE_SYSTEM_NAME STREQUAL "Android" OR onnxruntime_BUILD_WEBASSEMBLY)
+if (CMAKE_SYSTEM_NAME STREQUAL "iOS" OR CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(FLATBUFFERS_BUILD_FLATC OFF CACHE BOOL "FLATBUFFERS_BUILD_FLATC" FORCE)
endif()
set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "FLATBUFFERS_BUILD_TESTS" FORCE)
set(FLATBUFFERS_INSTALL OFF CACHE BOOL "FLATBUFFERS_INSTALL" FORCE)
set(FLATBUFFERS_BUILD_FLATHASH OFF CACHE BOOL "FLATBUFFERS_BUILD_FLATHASH" FORCE)
set(FLATBUFFERS_BUILD_FLATLIB ON CACHE BOOL "FLATBUFFERS_BUILD_FLATLIB" FORCE)
+if(Patch_FOUND)
+ set(ONNXRUNTIME_FLATBUFFERS_PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/flatbuffers/flatbuffers.patch)
+else()
+ set(ONNXRUNTIME_FLATBUFFERS_PATCH_COMMAND "")
+endif()
#flatbuffers 1.11.0 does not have flatbuffers::IsOutRange, therefore we require 1.12.0+
FetchContent_Declare(
flatbuffers
URL ${DEP_URL_flatbuffers}
URL_HASH SHA1=${DEP_SHA1_flatbuffers}
+ PATCH_COMMAND ${ONNXRUNTIME_FLATBUFFERS_PATCH_COMMAND}
FIND_PACKAGE_ARGS 1.12.0...<2.0.0 NAMES Flatbuffers
)
+# Download a protoc binary from Internet if needed
+if(CMAKE_CROSSCOMPILING AND NOT ONNX_CUSTOM_PROTOC_EXECUTABLE)
+ # This part of code is only for users' convenience. The code couldn't handle all cases. Users always can manually
+ # download protoc from Protobuf's Github release page and pass the local path to the ONNX_CUSTOM_PROTOC_EXECUTABLE
+ # variable.
+ message("CMAKE_HOST_SYSTEM_NAME: ${CMAKE_HOST_SYSTEM_NAME}")
+ if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
+ if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "AMD64")
+ FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_win64} URL_HASH SHA1=${DEP_SHA1_protoc_win64})
+ FetchContent_Populate(protoc_binary)
+ elseif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86")
+ FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_win32} URL_HASH SHA1=${DEP_SHA1_protoc_win32})
+ FetchContent_Populate(protoc_binary)
+ endif()
+ if(protoc_binary_SOURCE_DIR)
+ message("Use prebuilt protoc")
+ set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${protoc_binary_SOURCE_DIR}/bin/protoc.exe)
+ set(PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE})
+ endif()
+ elseif(CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux")
+ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(x86_64|amd64)$")
+ FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_linux_x64} URL_HASH SHA1=${DEP_SHA1_protoc_linux_x64})
+ FetchContent_Populate(protoc_binary)
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86?)$")
+ FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_linux_x86} URL_HASH SHA1=${DEP_SHA1_protoc_linux_x86})
+ FetchContent_Populate(protoc_binary)
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64.*")
+ FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_linux_aarch64} URL_HASH SHA1=${DEP_SHA1_protoc_linux_aarch64})
+ FetchContent_Populate(protoc_binary)
+ endif()
+ if(protoc_binary_SOURCE_DIR)
+ message("Use prebuilt protoc")
+ set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${protoc_binary_SOURCE_DIR}/bin/protoc)
+ set(PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE})
+ endif()
+ elseif ((CMAKE_SYSTEM_NAME STREQUAL "Emscripten" OR CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin")
+ FetchContent_Declare(protoc_binary URL ${DEP_URL_protoc_mac_universal} URL_HASH SHA1=${DEP_SHA1_protoc_mac_universal})
+ FetchContent_Populate(protoc_binary)
+ if(protoc_binary_SOURCE_DIR)
+ message("Use prebuilt protoc")
+ set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${protoc_binary_SOURCE_DIR}/bin/protoc)
+ set(PROTOC_EXECUTABLE ${ONNX_CUSTOM_PROTOC_EXECUTABLE})
+ endif()
+ endif()
+endif()
+
#Here we support two build mode:
#1. if ONNX_CUSTOM_PROTOC_EXECUTABLE is set, build Protobuf from source, except protoc.exe. This mode is mainly
# for cross-compiling
@@ -109,6 +158,19 @@ if(Patch_FOUND)
else()
set(ONNXRUNTIME_PROTOBUF_PATCH_COMMAND "")
endif()
+
+FetchContent_Declare(
+ utf8_range
+ URL ${DEP_URL_utf8_range}
+ URL_HASH SHA1=${DEP_SHA1_utf8_range}
+ FIND_PACKAGE_ARGS NAMES utf8_range
+)
+
+set(utf8_range_ENABLE_TESTS OFF CACHE BOOL "Build test suite" FORCE)
+set(utf8_range_ENABLE_INSTALL OFF CACHE BOOL "Configure installation" FORCE)
+
+
+#Protobuf depends on absl and utf8_range
FetchContent_Declare(
Protobuf
URL ${DEP_URL_protobuf}
@@ -116,7 +178,15 @@ FetchContent_Declare(
PATCH_COMMAND ${ONNXRUNTIME_PROTOBUF_PATCH_COMMAND}
FIND_PACKAGE_ARGS 3.21.12 NAMES Protobuf
)
+
set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests" FORCE)
+#TODO: we'd better to turn the following option off. However, it will cause
+# ".\build.bat --config Debug --parallel --skip_submodule_sync --update" fail with an error message:
+# install(EXPORT "ONNXTargets" ...) includes target "onnx_proto" which requires target "libprotobuf-lite" that is
+# not in any export set.
+#set(protobuf_INSTALL OFF CACHE BOOL "Install protobuf binaries and files" FORCE)
+set(protobuf_USE_EXTERNAL_GTEST ON CACHE BOOL "" FORCE)
+
if (CMAKE_SYSTEM_NAME STREQUAL "Android")
set(protobuf_BUILD_PROTOC_BINARIES OFF CACHE BOOL "Build protobuf tests" FORCE)
set(protobuf_WITH_ZLIB OFF CACHE BOOL "Build with zlib support" FORCE)
@@ -132,14 +202,13 @@ set(ENABLE_DATE_TESTING OFF CACHE BOOL "" FORCE)
set(USE_SYSTEM_TZ_DB ON CACHE BOOL "" FORCE)
FetchContent_Declare(
- date
- URL ${DEP_URL_date}
- URL_HASH SHA1=${DEP_SHA1_date}
- )
+ date
+ URL ${DEP_URL_date}
+ URL_HASH SHA1=${DEP_SHA1_date}
+ FIND_PACKAGE_ARGS 3...<4 NAMES date
+)
onnxruntime_fetchcontent_makeavailable(date)
-
-
FetchContent_Declare(
mp11
URL ${DEP_URL_mp11}
@@ -174,7 +243,7 @@ if (onnxruntime_ENABLE_CPUINFO)
else()
# if xnnpack is enabled in a wasm build it needs clog from cpuinfo, but we won't internally use cpuinfo
# so we don't set CPUINFO_SUPPORTED in the CXX flags below.
- if (onnxruntime_BUILD_WEBASSEMBLY AND NOT onnxruntime_USE_XNNPACK)
+ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND NOT onnxruntime_USE_XNNPACK)
set(CPUINFO_SUPPORTED FALSE)
else()
set(CPUINFO_SUPPORTED TRUE)
@@ -200,6 +269,20 @@ else()
set(CPUINFO_SUPPORTED FALSE)
endif()
+# xnnpack depends on clog
+# Android build should use the system's log library instead of clog
+if ((CPUINFO_SUPPORTED OR onnxruntime_USE_XNNPACK) AND NOT ANDROID)
+ set(CLOG_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+ FetchContent_Declare(
+ pytorch_clog
+ URL ${DEP_URL_pytorch_cpuinfo}
+ URL_HASH SHA1=${DEP_SHA1_pytorch_cpuinfo}
+ SOURCE_SUBDIR deps/clog
+ )
+ set(ONNXRUNTIME_CLOG_PROJ pytorch_clog)
+ set(ONNXRUNTIME_CLOG_TARGET_NAME clog)
+endif()
+
if (CPUINFO_SUPPORTED)
if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
set(IOS ON CACHE INTERNAL "")
@@ -208,7 +291,7 @@ if (CPUINFO_SUPPORTED)
# if this is a wasm build with xnnpack (only type of wasm build where cpuinfo is involved)
# we do not use cpuinfo in ORT code, so don't define CPUINFO_SUPPORTED.
- if (NOT onnxruntime_BUILD_WEBASSEMBLY)
+ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
string(APPEND CMAKE_CXX_FLAGS " -DCPUINFO_SUPPORTED")
endif()
@@ -224,7 +307,7 @@ if (CPUINFO_SUPPORTED)
URL_HASH SHA1=${DEP_SHA1_pytorch_cpuinfo}
FIND_PACKAGE_ARGS NAMES cpuinfo
)
-
+ set(ONNXRUNTIME_CPUINFO_PROJ pytorch_cpuinfo)
endif()
@@ -264,8 +347,14 @@ FetchContent_Declare(
URL_HASH SHA1=${DEP_SHA1_safeint}
)
+# use fetch content rather than makeavailable because safeint only includes unconditional test targets
+FetchContent_Populate(safeint)
# The next line will generate an error message "fatal: not a git repository", but it is ok. It is from flatbuffers
-onnxruntime_fetchcontent_makeavailable(Protobuf nlohmann_json mp11 re2 safeint GSL flatbuffers)
+onnxruntime_fetchcontent_makeavailable(utf8_range)
+# protobuf's cmake/utf8_range.cmake has the following line
+include_directories(${utf8_range_SOURCE_DIR})
+
+onnxruntime_fetchcontent_makeavailable(Protobuf nlohmann_json mp11 re2 GSL flatbuffers ${ONNXRUNTIME_CPUINFO_PROJ} ${ONNXRUNTIME_CLOG_PROJ})
if(NOT flatbuffers_FOUND)
if(NOT TARGET flatbuffers::flatbuffers)
add_library(flatbuffers::flatbuffers ALIAS flatbuffers)
@@ -361,15 +450,7 @@ FetchContent_Declare(
)
-if (CPUINFO_SUPPORTED)
- onnxruntime_fetchcontent_makeavailable(pytorch_cpuinfo)
- if (pytorch_cpuinfo_SOURCE_DIR)
- # shouldn't need to define these aliases after we use a version of cpuinfo with this commit:
- # https://github.com/pytorch/cpuinfo/commit/082deffc80ce517f81dc2f3aebe6ba671fcd09c9
- add_library(cpuinfo::cpuinfo ALIAS cpuinfo)
- add_library(cpuinfo::clog ALIAS clog)
- endif()
-endif()
+
@@ -410,7 +491,7 @@ endif()
#onnxruntime_EXTERNAL_LIBRARIES could contain onnx, onnx_proto,libprotobuf, cuda/cudnn,
# dnnl/mklml, onnxruntime_codegen_tvm, tvm and pthread
# pthread is always at the last
-set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} WIL::WIL nlohmann_json::nlohmann_json onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date_interface)
+set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date ${ONNXRUNTIME_CLOG_TARGET_NAME})
# The source code of onnx_proto is generated, we must build this lib first before starting to compile the other source code that uses ONNX protobuf types.
# The other libs do not have the problem. All the sources are already there. We can compile them in any order.
set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto flatbuffers::flatbuffers)
@@ -477,6 +558,3 @@ endif()
FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR)
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR)
-if (onnxruntime_USE_AZURE)
- include(triton)
-endif()
diff --git a/cmake/external/protobuf_function.cmake b/cmake/external/protobuf_function.cmake
index 81458d7123c54..82aa5a3d2dcef 100644
--- a/cmake/external/protobuf_function.cmake
+++ b/cmake/external/protobuf_function.cmake
@@ -33,6 +33,7 @@
#sed -i 's/protobuf_generate/onnxruntime_protobuf_generate/g' protobuf-config.cmake.orig
#replace 'protobuf::protoc' with ${PROTOC_EXECUTABLE} and ${PROTOC_DEPS}
#remove OUTDIR
+#add compile options to generated C++ source files to work around warnings
function(onnxruntime_protobuf_generate)
include(CMakeParseArguments)
@@ -166,6 +167,30 @@ function(onnxruntime_protobuf_generate)
endforeach()
set_source_files_properties(${_generated_srcs_all} PROPERTIES GENERATED TRUE)
+
+ if(onnxruntime_protobuf_generate_LANGUAGE STREQUAL cpp)
+ # work around warnings from protobuf generated C++ code
+ # TODO remove these if possible when upgrading protobuf. hopefully we don't need to add to them.
+
+ set(_warning_options)
+
+ if(MSVC)
+ # google\protobuf\has_bits.h(74,0): Warning C4267: 'argument': conversion from 'size_t' to 'int', possible loss of data
+ list(APPEND _warning_options "/wd4267")
+ else()
+ # TODO remove when we upgrade to a protobuf version where this is fixed (looks like it is addressed in version 22.0+)
+ # google/protobuf/parse_context.h:328:47: error: implicit conversion loses integer precision: 'long' to 'int' [-Werror,-Wshorten-64-to-32]
+ # int chunk_size = buffer_end_ + kSlopBytes - ptr;
+ if(HAS_SHORTEN_64_TO_32)
+ list(APPEND _warning_options "-Wno-error=shorten-64-to-32")
+ endif()
+ endif()
+
+ if(_warning_options)
+ set_source_files_properties(${_generated_srcs_all} PROPERTIES COMPILE_OPTIONS ${_warning_options})
+ endif()
+ endif()
+
if(onnxruntime_protobuf_generate_OUT_VAR)
set(${onnxruntime_protobuf_generate_OUT_VAR} ${_generated_srcs_all} PARENT_SCOPE)
endif()
diff --git a/cmake/external/triton.cmake b/cmake/external/triton.cmake
deleted file mode 100644
index b24768bd89afd..0000000000000
--- a/cmake/external/triton.cmake
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-
-include(ExternalProject)
-
-if (WIN32)
-
- function(get_vcpkg)
- ExternalProject_Add(vcpkg
- GIT_REPOSITORY https://github.com/microsoft/vcpkg.git
- GIT_TAG 2022.11.14
- PREFIX vcpkg
- SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/vcpkg-src
- BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/vcpkg-build
- CONFIGURE_COMMAND ""
- INSTALL_COMMAND ""
- UPDATE_COMMAND ""
- BUILD_COMMAND "/bootstrap-vcpkg.bat")
-
- ExternalProject_Get_Property(vcpkg SOURCE_DIR)
- set(VCPKG_SRC ${SOURCE_DIR} PARENT_SCOPE)
- set(VCPKG_DEPENDENCIES "vcpkg" PARENT_SCOPE)
- endfunction()
-
- function(vcpkg_install PACKAGE_NAME)
- add_custom_command(
- OUTPUT ${VCPKG_SRC}/packages/${PACKAGE_NAME}_${onnxruntime_target_platform}-windows/BUILD_INFO
- COMMAND ${VCPKG_SRC}/vcpkg install ${PACKAGE_NAME}:${onnxruntime_target_platform}-windows
- WORKING_DIRECTORY ${VCPKG_SRC}
- DEPENDS vcpkg)
-
- add_custom_target(get${PACKAGE_NAME}
- ALL
- DEPENDS ${VCPKG_SRC}/packages/${PACKAGE_NAME}_${onnxruntime_target_platform}-windows/BUILD_INFO)
-
- list(APPEND VCPKG_DEPENDENCIES "get${PACKAGE_NAME}")
- set(VCPKG_DEPENDENCIES ${VCPKG_DEPENDENCIES} PARENT_SCOPE)
- endfunction()
-
- get_vcpkg()
- vcpkg_install(openssl)
- vcpkg_install(openssl-windows)
- vcpkg_install(rapidjson)
- vcpkg_install(re2)
- vcpkg_install(boost-interprocess)
- vcpkg_install(boost-stacktrace)
- vcpkg_install(zlib)
- vcpkg_install(pthread)
- vcpkg_install(b64)
-
- add_dependencies(getb64 getpthread)
- add_dependencies(getpthread getzlib)
- add_dependencies(getzlib getboost-stacktrace)
- add_dependencies(getboost-stacktrace getboost-interprocess)
- add_dependencies(getboost-interprocess getre2)
- add_dependencies(getre2 getrapidjson)
- add_dependencies(getrapidjson getopenssl-windows)
- add_dependencies(getopenssl-windows getopenssl)
-
- ExternalProject_Add(triton
- GIT_REPOSITORY https://github.com/triton-inference-server/client.git
- GIT_TAG r22.12
- PREFIX triton
- SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src
- BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build
- CMAKE_ARGS -DVCPKG_TARGET_TRIPLET=${onnxruntime_target_platform}-windows -DCMAKE_TOOLCHAIN_FILE=${VCPKG_SRC}/scripts/buildsystems/vcpkg.cmake -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON
- INSTALL_COMMAND ""
- UPDATE_COMMAND "")
-
- add_dependencies(triton ${VCPKG_DEPENDENCIES})
-
-else()
-
- ExternalProject_Add(rapidjson
- GIT_REPOSITORY https://github.com/Tencent/rapidjson.git
- GIT_TAG v1.1.0
- PREFIX rapidjson
- SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/rapidjson-src
- BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/rapidjson-build
- CMAKE_ARGS -DRAPIDJSON_BUILD_TESTS=OFF -DRAPIDJSON_BUILD_DOC=OFF -DRAPIDJSON_BUILD_EXAMPLES=OFF)
-
- ExternalProject_Get_Property(rapidjson source_dir)
- set(RAPIDJSON_INCLUDE_DIR ${source_dir}/include)
- include_directories(${RAPIDJSON_INCLUDE_DIR})
-
- ExternalProject_Add(triton
- GIT_REPOSITORY https://github.com/triton-inference-server/client.git
- GIT_TAG r22.12
- PREFIX triton
- SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-src
- BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps/triton-build
- CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=binary -DTRITON_ENABLE_CC_HTTP=ON
- INSTALL_COMMAND ""
- UPDATE_COMMAND "")
-
- add_dependencies(triton rapidjson)
-
-endif() #if (WIN32)
-
-ExternalProject_Get_Property(triton SOURCE_DIR)
-set(TRITON_SRC ${SOURCE_DIR})
-
-ExternalProject_Get_Property(triton BINARY_DIR)
-set(TRITON_BIN ${BINARY_DIR}/binary)
-set(TRITON_THIRD_PARTY ${BINARY_DIR}/third-party)
\ No newline at end of file
diff --git a/cmake/external/wil.cmake b/cmake/external/wil.cmake
index d38535c4a173b..120e986ebb552 100644
--- a/cmake/external/wil.cmake
+++ b/cmake/external/wil.cmake
@@ -4,19 +4,13 @@ set(WIL_BUILD_PACKAGING OFF CACHE BOOL "" FORCE)
set(WIL_BUILD_TESTS OFF CACHE BOOL "" FORCE)
FetchContent_Declare(
- microsoft_wil
- URL ${DEP_URL_microsoft_wil}
- URL_HASH SHA1=${DEP_SHA1_microsoft_wil}
- FIND_PACKAGE_ARGS NAMES wil
+ microsoft_wil
+ URL ${DEP_URL_microsoft_wil}
+ URL_HASH SHA1=${DEP_SHA1_microsoft_wil}
+ FIND_PACKAGE_ARGS NAMES wil
)
-#We can not use FetchContent_MakeAvailable(microsoft_wil) at here, since their cmake file
-#always executes install command without conditions.
-FetchContent_Populate(microsoft_wil)
-if(NOT wil_FOUND)
- add_library(WIL INTERFACE)
- add_library(WIL::WIL ALIAS WIL)
- # The interface's include directory.
- target_include_directories(WIL INTERFACE
- $)
-endif()
\ No newline at end of file
+if(WIN32)
+ onnxruntime_fetchcontent_makeavailable(microsoft_wil)
+ set(WIL_TARGET "WIL::WIL")
+endif()
diff --git a/cmake/external/xnnpack.cmake b/cmake/external/xnnpack.cmake
index 1fc2c6ccdc9fa..7455584f1a625 100644
--- a/cmake/external/xnnpack.cmake
+++ b/cmake/external/xnnpack.cmake
@@ -35,7 +35,7 @@ set(XNNPACK_INCLUDE_DIR ${XNNPACK_DIR}/include)
set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool)
# the XNNPACK CMake setup doesn't include the WASM kernels so we have to manually set those up
-if(onnxruntime_BUILD_WEBASSEMBLY)
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
file(READ "${XNNPACK_DIR}/BUILD.bazel" xnnpack_bazel_config)
# Replace newlines with semicolon so that it is treated as a list by CMake
diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake
index 9f34d1f46751d..59ebf8eca4306 100644
--- a/cmake/onnxruntime.cmake
+++ b/cmake/onnxruntime.cmake
@@ -18,15 +18,37 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
set(OUTPUT_STYLE xcode)
endif()
+set(ONNXRUNTIME_PUBLIC_HEADERS
+ "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_c_api.h"
+ "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_api.h"
+ "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_float16.h"
+ "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_inline.h"
+ "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h"
+ "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h"
+)
+
+if (onnxruntime_ENABLE_TRAINING_APIS)
+ list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_c_api.h")
+ list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_api.h")
+ list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_inline.h")
+endif()
+
# This macro is to get the path of header files for mobile packaging, for iOS and Android
macro(get_mobile_api_headers _HEADERS)
# include both c and cxx api
set(${_HEADERS}
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_c_api.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_api.h"
+ "${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_float16.h"
"${REPO_ROOT}/include/onnxruntime/core/session/onnxruntime_cxx_inline.h"
)
+ if (onnxruntime_ENABLE_TRAINING_APIS)
+ list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_c_api.h")
+ list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_api.h")
+ list(APPEND ${_HEADERS} "${REPO_ROOT}/orttraining/orttraining/training_api/include/onnxruntime_training_cxx_inline.h")
+ endif()
+
# need to add header files for enabled EPs
foreach(f ${ONNXRUNTIME_PROVIDER_NAMES})
file(GLOB _provider_headers CONFIGURE_DEPENDS
@@ -98,7 +120,7 @@ else()
endif()
add_dependencies(onnxruntime onnxruntime_generate_def ${onnxruntime_EXTERNAL_DEPENDENCIES})
-target_include_directories(onnxruntime PRIVATE ${ONNXRUNTIME_ROOT})
+target_include_directories(onnxruntime PRIVATE ${ONNXRUNTIME_ROOT} PUBLIC "$")
target_compile_definitions(onnxruntime PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
target_compile_definitions(onnxruntime PRIVATE VER_MINOR=${VERSION_MINOR_PART})
@@ -130,7 +152,7 @@ if (NOT WIN32)
else()
set_target_properties(onnxruntime PROPERTIES INSTALL_RPATH "@loader_path")
endif()
- elseif (NOT onnxruntime_BUILD_WEBASSEMBLY)
+ elseif (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-rpath='$ORIGIN'")
endif()
endif()
@@ -182,9 +204,9 @@ set(onnxruntime_INTERNAL_LIBRARIES
${PROVIDERS_SNPE}
${PROVIDERS_TVM}
${PROVIDERS_RKNPU}
- ${PROVIDERS_ROCM}
${PROVIDERS_VITISAI}
${PROVIDERS_XNNPACK}
+ ${PROVIDERS_WEBNN}
${PROVIDERS_AZURE}
${PROVIDERS_INTERNAL_TESTING}
${onnxruntime_winml}
@@ -206,6 +228,13 @@ if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
)
endif()
+if (onnxruntime_USE_EXTENSIONS)
+ list(APPEND onnxruntime_INTERNAL_LIBRARIES
+ onnxruntime_extensions
+ ocos_operators
+ )
+endif()
+
# If you are linking a new library, please add it to the list onnxruntime_INTERNAL_LIBRARIES or onnxruntime_EXTERNAL_LIBRARIES,
# Please do not add a library directly to the target_link_libraries command
target_link_libraries(onnxruntime PRIVATE
@@ -214,18 +243,21 @@ target_link_libraries(onnxruntime PRIVATE
)
set_property(TARGET onnxruntime APPEND_STRING PROPERTY LINK_FLAGS ${ONNXRUNTIME_SO_LINK_FLAG} ${onnxruntime_DELAYLOAD_FLAGS})
-set_target_properties(onnxruntime PROPERTIES LINK_DEPENDS ${SYMBOL_FILE})
-
-
-set_target_properties(onnxruntime PROPERTIES VERSION ${ORT_VERSION})
+set_target_properties(onnxruntime PROPERTIES
+ PUBLIC_HEADER "${ONNXRUNTIME_PUBLIC_HEADERS}"
+ LINK_DEPENDS ${SYMBOL_FILE}
+ VERSION ${ORT_VERSION}
+ FOLDER "ONNXRuntime"
+)
install(TARGETS onnxruntime
+ EXPORT ${PROJECT_NAME}Targets
+ PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
- RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+ RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}
FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
-set_target_properties(onnxruntime PROPERTIES FOLDER "ONNXRuntime")
if (WIN32 AND NOT CMAKE_CXX_STANDARD_LIBRARIES MATCHES kernel32.lib)
# Workaround STL bug https://github.com/microsoft/STL/issues/434#issuecomment-921321254
diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake
index 0410d3361cb60..43d5fa9bdee34 100644
--- a/cmake/onnxruntime_common.cmake
+++ b/cmake/onnxruntime_common.cmake
@@ -22,8 +22,8 @@ set(onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/telemetry.cc"
"${ONNXRUNTIME_ROOT}/core/platform/logging/make_platform_default_log_sink.h"
"${ONNXRUNTIME_ROOT}/core/platform/logging/make_platform_default_log_sink.cc"
- "$(ONNXRUNTIME_ROOT}/core/quantization/*.h"
- "$(ONNXRUNTIME_ROOT}/core/quantization/*.cc"
+ "${ONNXRUNTIME_ROOT}/core/quantization/*.h"
+ "${ONNXRUNTIME_ROOT}/core/quantization/*.cc"
)
if(WIN32)
@@ -86,7 +86,12 @@ endif()
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_common_src})
onnxruntime_add_static_library(onnxruntime_common ${onnxruntime_common_src})
-
+if(WIN32)
+ if("cxx_std_23" IN_LIST CMAKE_CXX_COMPILE_FEATURES)
+ set_property(TARGET onnxruntime_common PROPERTY CXX_STANDARD 23)
+ target_compile_options(onnxruntime_common PRIVATE "/Zc:char8_t-")
+ endif()
+endif()
if (onnxruntime_USE_TELEMETRY)
set_target_properties(onnxruntime_common PROPERTIES COMPILE_FLAGS "/FI${ONNXRUNTIME_INCLUDE_DIR}/core/platform/windows/TraceLoggingConfigPrivate.h")
endif()
@@ -107,7 +112,16 @@ if(NOT onnxruntime_DISABLE_ABSEIL)
endif()
endif()
-onnxruntime_add_include_to_target(onnxruntime_common date_interface WIL::WIL)
+if (MSVC)
+ set(EIGEN_NATVIS_FILE ${eigen_SOURCE_DIR}/debug/msvc/eigen.natvis)
+ if (EXISTS ${EIGEN_NATVIS_FILE})
+ target_sources(
+ onnxruntime_common
+ INTERFACE $)
+ endif()
+endif()
+
+onnxruntime_add_include_to_target(onnxruntime_common date::date ${WIL_TARGET})
target_include_directories(onnxruntime_common
PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS}
# propagate include directories of dependencies that are part of public interface
@@ -119,7 +133,6 @@ target_link_libraries(onnxruntime_common PUBLIC safeint_interface ${GSL_TARGET}
add_dependencies(onnxruntime_common ${onnxruntime_EXTERNAL_DEPENDENCIES})
-install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/common DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
set_target_properties(onnxruntime_common PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(onnxruntime_common PROPERTIES FOLDER "ONNXRuntime")
@@ -153,7 +166,7 @@ elseif(APPLE)
if(CMAKE_OSX_ARCHITECTURES_LEN LESS_EQUAL 1)
set(X64 TRUE)
endif()
-elseif(NOT onnxruntime_BUILD_WEBASSEMBLY)
+elseif(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
if (CMAKE_SYSTEM_NAME STREQUAL "Android")
if (CMAKE_ANDROID_ARCH_ABI STREQUAL "armeabi-v7a")
set(ARM TRUE)
@@ -195,13 +208,14 @@ if (ARM64 OR ARM OR X86 OR X64 OR X86_64)
# Its functionality in detecting x86 cpu features are lacking, so is support for Windows.
if (CPUINFO_SUPPORTED)
onnxruntime_add_include_to_target(onnxruntime_common cpuinfo::cpuinfo)
- list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo cpuinfo::clog)
+ list(APPEND onnxruntime_EXTERNAL_LIBRARIES cpuinfo::cpuinfo ${ONNXRUNTIME_CLOG_TARGET_NAME})
endif()
endif()
endif()
if (NOT onnxruntime_BUILD_SHARED_LIB)
- install(TARGETS onnxruntime_common
+ install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/common DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
+ install(TARGETS onnxruntime_common
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
diff --git a/cmake/onnxruntime_compile_triton_kernel.cmake b/cmake/onnxruntime_compile_triton_kernel.cmake
new file mode 100644
index 0000000000000..f59cc6de108bc
--- /dev/null
+++ b/cmake/onnxruntime_compile_triton_kernel.cmake
@@ -0,0 +1,33 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+find_package(Python3 COMPONENTS Interpreter REQUIRED)
+
+# set all triton kernel ops that need to be compiled
+set(triton_kernel_scripts
+ "onnxruntime/core/providers/rocm/math/softmax_triton.py"
+ "onnxruntime/contrib_ops/rocm/diffusion/group_norm_triton.py"
+)
+
+function(compile_triton_kernel out_triton_kernel_obj_file out_triton_kernel_header_dir)
+ # compile triton kernel, generate .a and .h files
+ set(triton_kernel_compiler "${REPO_ROOT}/tools/ci_build/compile_triton.py")
+ set(out_dir "${CMAKE_CURRENT_BINARY_DIR}/triton_kernels")
+ set(out_obj_file "${out_dir}/triton_kernel_infos.a")
+ set(header_file "${out_dir}/triton_kernel_infos.h")
+
+ list(TRANSFORM triton_kernel_scripts PREPEND "${REPO_ROOT}/")
+
+ add_custom_command(
+ OUTPUT ${out_obj_file} ${header_file}
+ COMMAND Python3::Interpreter ${triton_kernel_compiler}
+ --header ${header_file}
+ --script_files ${triton_kernel_scripts}
+ --obj_file ${out_obj_file}
+ DEPENDS ${triton_kernel_scripts} ${triton_kernel_compiler}
+ COMMENT "Triton compile generates: ${out_obj_file}"
+ )
+ add_custom_target(onnxruntime_triton_kernel DEPENDS ${out_obj_file} ${header_file})
+ set(${out_triton_kernel_obj_file} ${out_obj_file} PARENT_SCOPE)
+ set(${out_triton_kernel_header_dir} ${out_dir} PARENT_SCOPE)
+endfunction()
diff --git a/cmake/onnxruntime_config.h.in b/cmake/onnxruntime_config.h.in
index 44d4788acb0a4..2aef9dcf209e0 100644
--- a/cmake/onnxruntime_config.h.in
+++ b/cmake/onnxruntime_config.h.in
@@ -3,22 +3,24 @@
#pragma once
-#cmakedefine HAS_UNUSED_BUT_SET_PARAMETER
-#cmakedefine HAS_UNUSED_BUT_SET_VARIABLE
-#cmakedefine HAS_UNUSED_VARIABLE
+#cmakedefine HAS_BITWISE_INSTEAD_OF_LOGICAL
#cmakedefine HAS_CAST_FUNCTION_TYPE
-#cmakedefine HAS_PARENTHESES
-#cmakedefine HAS_USELESS_CAST
-#cmakedefine HAS_NONNULL_COMPARE
-#cmakedefine HAS_TAUTOLOGICAL_POINTER_COMPARE
#cmakedefine HAS_CATCH_VALUE
-#cmakedefine HAS_MISSING_BRACES
-#cmakedefine HAS_IGNORED_ATTRIBUTES
-#cmakedefine HAS_DEPRECATED_COPY
#cmakedefine HAS_CLASS_MEMACCESS
-#cmakedefine HAS_MAYBE_UNINITIALIZED
+#cmakedefine HAS_DEPRECATED_COPY
#cmakedefine HAS_DEPRECATED_DECLARATIONS
#cmakedefine HAS_FORMAT_TRUNCATION
-#cmakedefine HAS_BITWISE_INSTEAD_OF_LOGICAL
+#cmakedefine HAS_IGNORED_ATTRIBUTES
+#cmakedefine HAS_MAYBE_UNINITIALIZED
+#cmakedefine HAS_MISSING_BRACES
+#cmakedefine HAS_NONNULL_COMPARE
+#cmakedefine HAS_PARENTHESES
#cmakedefine HAS_REALLOCARRAY
-#cmakedefine ORT_VERSION "@ORT_VERSION@"
+#cmakedefine HAS_SHORTEN_64_TO_32
+#cmakedefine HAS_TAUTOLOGICAL_POINTER_COMPARE
+#cmakedefine HAS_UNUSED_BUT_SET_PARAMETER
+#cmakedefine HAS_UNUSED_BUT_SET_VARIABLE
+#cmakedefine HAS_UNUSED_VARIABLE
+#cmakedefine HAS_USELESS_CAST
+#cmakedefine ORT_BUILD_INFO u8"@ORT_BUILD_INFO@"
+#cmakedefine ORT_VERSION u8"@ORT_VERSION@"
diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake
index 5c947a52b7838..c9bf2ac5c3dc6 100644
--- a/cmake/onnxruntime_framework.cmake
+++ b/cmake/onnxruntime_framework.cmake
@@ -8,12 +8,31 @@ file(GLOB_RECURSE onnxruntime_framework_srcs CONFIGURE_DEPENDS
)
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-file(GLOB_RECURSE onnxruntime_training_framework_torch_srcs CONFIGURE_DEPENDS
- "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
- "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
-)
-
+ file(GLOB_RECURSE onnxruntime_training_framework_torch_srcs CONFIGURE_DEPENDS
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
+ )
list(APPEND onnxruntime_framework_srcs ${onnxruntime_training_framework_torch_srcs})
+ if (onnxruntime_ENABLE_TRITON)
+ file(GLOB_RECURSE onnxruntime_training_framework_triton_srcs CONFIGURE_DEPENDS
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
+ )
+ list(APPEND onnxruntime_framework_srcs ${onnxruntime_training_framework_triton_srcs})
+ endif()
+elseif(onnxruntime_ENABLE_TRITON)
+ # Triton executor shares some code from torch_interop, such as python and dlpack related code files.
+ # When torch_interop is enabled, all these dependencies are already included.
+ # But if not, we need to include them explicitly.
+ file(GLOB_RECURSE onnxruntime_training_framework_triton_srcs CONFIGURE_DEPENDS
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/dlpack_python.h"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/dlpack_python.cc"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/gil.h"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/python_common.h"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
+ )
+ list(APPEND onnxruntime_framework_srcs ${onnxruntime_training_framework_triton_srcs})
endif()
if (onnxruntime_MINIMAL_BUILD)
@@ -37,26 +56,12 @@ source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_framework_srcs})
onnxruntime_add_static_library(onnxruntime_framework ${onnxruntime_framework_srcs})
-if (onnxruntime_USE_AZURE)
-
- add_dependencies(onnxruntime_framework triton)
- target_include_directories(onnxruntime_framework PRIVATE ${TRITON_BIN}/include)
- link_directories(${TRITON_BIN}/lib ${TRITON_BIN}/lib64 ${TRITON_THIRD_PARTY}/curl/lib ${TRITON_THIRD_PARTY}/curl/lib64)
-
- if (WIN32)
-
- link_directories(${VCPKG_SRC}/installed/${onnxruntime_target_platform}-windows/lib)
- target_link_libraries(onnxruntime_framework PRIVATE libcurl httpclient_static ws2_32 crypt32 Wldap32 zlib)
-
- else()
-
- find_package(ZLIB REQUIRED)
- find_package(OpenSSL REQUIRED)
- target_link_libraries(onnxruntime_framework PRIVATE httpclient_static curl ZLIB::ZLIB OpenSSL::Crypto OpenSSL::SSL)
-
- endif() #if (WIN32)
-
-endif() #if (onnxruntime_USE_AZURE)
+if (MSVC)
+ set(ORT_FRAMEWORK_NATVIS_FILE "onnxruntime_framework.natvis")
+ target_sources(
+ onnxruntime_framework
+ INTERFACE $)
+endif()
if(onnxruntime_ENABLE_INSTRUMENT)
target_compile_definitions(onnxruntime_framework PRIVATE ONNXRUNTIME_ENABLE_INSTRUMENT)
@@ -71,7 +76,7 @@ endif()
# Needed for the provider interface, as it includes training headers when training is enabled
if (onnxruntime_ENABLE_TRAINING_OPS)
target_include_directories(onnxruntime_framework PRIVATE ${ORTTRAINING_ROOT})
- if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+ if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
onnxruntime_add_include_to_target(onnxruntime_framework Python::Module)
target_include_directories(onnxruntime_framework PRIVATE ${dlpack_SOURCE_DIR}/include)
endif()
@@ -91,7 +96,7 @@ if (onnxruntime_USE_MIMALLOC)
target_link_libraries(onnxruntime_framework mimalloc-static)
endif()
-if (onnxruntime_BUILD_WEBASSEMBLY)
+if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
target_link_libraries(onnxruntime_framework ${ABSEIL_LIBS})
endif()
@@ -103,18 +108,14 @@ add_dependencies(onnxruntime_framework ${onnxruntime_EXTERNAL_DEPENDENCIES})
# For the shared onnxruntime library, this is set in onnxruntime.cmake through CMAKE_SHARED_LINKER_FLAGS
# But our test files don't use the shared library so this must be set for them.
# For Win32 it generates an absolute path for shared providers based on the location of the executable/onnxruntime.dll
-if (UNIX AND NOT APPLE AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_BUILD_WEBASSEMBLY)
+if (UNIX AND NOT APPLE AND NOT onnxruntime_MINIMAL_BUILD AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath='$ORIGIN'")
endif()
if (onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS_ENABLE_DUMP_TO_SQLDB)
- find_package (SQLite3)
- if (SQLITE3_FOUND)
- include_directories(${SQLite3_INCLUDE_DIR})
- target_link_libraries (onnxruntime_framework ${SQLite3_LIBRARY})
- else()
- message( FATAL_ERROR "Could not locate SQLite3 package." )
- endif (SQLITE3_FOUND)
+ find_package (SQLite3 REQUIRED)
+ include_directories(${SQLite3_INCLUDE_DIR})
+ target_link_libraries (onnxruntime_framework ${SQLite3_LIBRARY})
target_compile_definitions(onnxruntime_framework PRIVATE DEBUG_NODE_INPUTS_OUTPUTS_ENABLE_DUMP_TO_SQLDB)
endif()
@@ -123,11 +124,10 @@ if (WIN32)
endif()
if (NOT onnxruntime_BUILD_SHARED_LIB)
- install(TARGETS onnxruntime_framework
+ install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/framework DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
+ install(TARGETS onnxruntime_framework
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()
-
-install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/framework DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
diff --git a/cmake/onnxruntime_framework.natvis b/cmake/onnxruntime_framework.natvis
new file mode 100644
index 0000000000000..75328e9aac3af
--- /dev/null
+++ b/cmake/onnxruntime_framework.natvis
@@ -0,0 +1,47 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ {{val={ val }}}
+
+ - _negative()
+ - true
+ - _is_finite()
+ - _is_normal()
+ - val
+ - _exponent()
+ - _biased_exponent()
+ - _significand()
+
+
+
+
+
+
+
+
+
+
+
+
+ {{val={ val }}}
+
+ - _negative()
+ - _is_normal()
+ - true
+ - _is_finite()
+ - val
+ - _exponent()
+ - _biased_exponent()
+ - _significand()
+
+
+
\ No newline at end of file
diff --git a/cmake/onnxruntime_graph.cmake b/cmake/onnxruntime_graph.cmake
index 4785d3ed3a87f..735c86956ec4f 100644
--- a/cmake/onnxruntime_graph.cmake
+++ b/cmake/onnxruntime_graph.cmake
@@ -87,7 +87,7 @@ endif()
onnxruntime_add_static_library(onnxruntime_graph ${onnxruntime_graph_lib_src})
add_dependencies(onnxruntime_graph onnx_proto flatbuffers::flatbuffers)
-onnxruntime_add_include_to_target(onnxruntime_graph onnxruntime_common WIL::WIL onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers safeint_interface Boost::mp11)
+onnxruntime_add_include_to_target(onnxruntime_graph onnxruntime_common ${WIL_TARGET} onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers safeint_interface Boost::mp11)
if (MSVC)
set(ONNX_PROTOBUF_NATVIS_FILE "onnx_protobuf.natvis")
@@ -98,7 +98,7 @@ if (MSVC)
endif()
if(NOT MSVC)
- target_compile_options(onnxruntime_graph PRIVATE "-Wno-parentheses")
+ target_compile_options(onnxruntime_graph PRIVATE "-Wno-parentheses" "-Wno-deprecated-declarations")
endif()
if (onnxruntime_ENABLE_TRAINING)
#TODO: the graph library should focus on ONNX IR, it shouldn't depend on math libraries like MKLML/OpenBlas
@@ -118,7 +118,6 @@ endif()
set_target_properties(onnxruntime_graph PROPERTIES FOLDER "ONNXRuntime")
set_target_properties(onnxruntime_graph PROPERTIES LINKER_LANGUAGE CXX)
-install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/graph DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_graph_src} ${onnxruntime_ir_defs_src})
if (onnxruntime_ENABLE_TRAINING_OPS)
source_group(TREE ${ORTTRAINING_ROOT} FILES ${orttraining_graph_src})
@@ -148,7 +147,8 @@ if (onnxruntime_ENABLE_ATEN)
endif()
if (NOT onnxruntime_BUILD_SHARED_LIB)
- install(TARGETS onnxruntime_graph
+ install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/graph DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
+ install(TARGETS onnxruntime_graph
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
diff --git a/cmake/onnxruntime_kernel_explorer.cmake b/cmake/onnxruntime_kernel_explorer.cmake
index d4ae88a1f65df..856fed40abdba 100644
--- a/cmake/onnxruntime_kernel_explorer.cmake
+++ b/cmake/onnxruntime_kernel_explorer.cmake
@@ -19,6 +19,9 @@ elseif(onnxruntime_USE_ROCM)
if (onnxruntime_USE_COMPOSABLE_KERNEL)
include(composable_kernel)
endif()
+ if (onnxruntime_USE_HIPBLASLT)
+ find_package(hipblaslt REQUIRED)
+ endif()
set(BERT_DIR ${ONNXRUNTIME_ROOT}/contrib_ops/rocm/bert)
endif()
@@ -66,6 +69,17 @@ elseif (onnxruntime_USE_ROCM)
target_compile_definitions(kernel_explorer PRIVATE USE_COMPOSABLE_KERNEL)
target_link_libraries(kernel_explorer PRIVATE onnxruntime_composable_kernel_includes)
endif()
+ if (onnxruntime_USE_TRITON_KERNEL)
+ target_compile_definitions(kernel_explorer PRIVATE USE_TRITON_KERNEL)
+ endif()
+ if (onnxruntime_USE_HIPBLASLT)
+ target_compile_definitions(kernel_explorer PRIVATE USE_HIPBLASLT)
+ endif()
+ if (onnxruntime_USE_ROCBLAS_EXTENSION_API)
+ target_compile_definitions(kernel_explorer PRIVATE USE_ROCBLAS_EXTENSION_API)
+ target_compile_definitions(kernel_explorer PRIVATE ROCBLAS_NO_DEPRECATED_WARNINGS)
+ target_compile_definitions(kernel_explorer PRIVATE ROCBLAS_BETA_FEATURES_API)
+ endif()
endif()
add_dependencies(kernel_explorer onnxruntime_pybind11_state)
diff --git a/cmake/onnxruntime_mlas.cmake b/cmake/onnxruntime_mlas.cmake
index 6828dfd07610a..e0ccc504d7b27 100644
--- a/cmake/onnxruntime_mlas.cmake
+++ b/cmake/onnxruntime_mlas.cmake
@@ -3,28 +3,6 @@
set(MLAS_SRC_DIR ${ONNXRUNTIME_ROOT}/core/mlas/lib)
-
-set(MLAS_AMX_SUPPORTED FALSE)
-
-if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
- # match assembler version, AMX instructions are supported from 2.38
- if (CMAKE_ASM_COMPILER_ID STREQUAL "GNU")
- execute_process(
- COMMAND as --version
- OUTPUT_VARIABLE _as_version
- )
- # 2.38 or later
- if (_as_version MATCHES "GNU.[Aa]ssembler.*(2\\.38|2\\.39|2\\.[4-9][0-9]|[3-9]\\.[0-9][0-9])")
- set(MLAS_AMX_SUPPORTED TRUE)
- endif()
- endif()
-endif()
-
-if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
- set(MLAS_AMX_SUPPORTED TRUE)
-endif()
-
-
#
# All hardware agnostic source files here
# hardware specific files would cause trouble in
@@ -57,10 +35,11 @@ onnxruntime_add_static_library(onnxruntime_mlas
${MLAS_SRC_DIR}/qdwconv_kernelsize.cpp
)
-if(MLAS_AMX_SUPPORTED)
- target_compile_definitions(onnxruntime_mlas PRIVATE MLAS_AMX_SUPPORTED)
-else()
- message(WARNING "AMX instructions NOT supported due to lack of compiler tool chain!")
+if (NOT onnxruntime_ORT_MINIMAL_BUILD)
+ target_sources(onnxruntime_mlas PRIVATE
+ ${MLAS_SRC_DIR}/q4_dq.cpp
+ ${MLAS_SRC_DIR}/q4gemm.cpp
+ )
endif()
set(ONNXRUNTIME_MLAS_LIBS onnxruntime_mlas)
@@ -153,6 +132,10 @@ function(setup_mlas_source_for_windows)
target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/arm/sgemmc.cpp
)
+ # it should be removed after Visual Stuio is upgraded to 17.7
+ if (MSVC)
+ add_compile_options("-d2SSAOptimizer-")
+ endif()
elseif(onnxruntime_target_platform STREQUAL "x64")
file(GLOB_RECURSE mlas_platform_srcs_avx CONFIGURE_DEPENDS
@@ -210,6 +193,12 @@ function(setup_mlas_source_for_windows)
${MLAS_SRC_DIR}/amd64/TanhKernelFma3.asm
${MLAS_SRC_DIR}/amd64/ErfKernelFma3.asm
)
+ if (NOT onnxruntime_ORT_MINIMAL_BUILD)
+ target_sources(onnxruntime_mlas PRIVATE
+ ${MLAS_SRC_DIR}/q4gemm_avx512.cpp
+ )
+ endif()
+
else()
target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/qgemm_kernel_sse.cpp
@@ -220,7 +209,7 @@ function(setup_mlas_source_for_windows)
endif()
endfunction()
-if (onnxruntime_BUILD_WEBASSEMBLY)
+if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
if (onnxruntime_ENABLE_WEBASSEMBLY_SIMD)
file(GLOB_RECURSE mlas_platform_srcs
"${MLAS_SRC_DIR}/wasm_simd/*.cpp"
@@ -550,15 +539,23 @@ else()
${mlas_platform_srcs_avx512core}
)
- if(MLAS_AMX_SUPPORTED)
+ if (NOT onnxruntime_ORT_MINIMAL_BUILD)
set(mlas_platform_srcs
${mlas_platform_srcs}
- ${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp
- ${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S
+ ${MLAS_SRC_DIR}/q4gemm_avx512.cpp
)
- set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp PROPERTIES COMPILE_FLAGS "-mamx-tile -mamx-int8 -mavx2 -mavx512bw -mavx512dq -mavx512vl")
- set_source_files_properties(${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S PROPERTIES COMPILE_FLAGS "-mamx-tile -mamx-int8 -mavx2 -mavx512bw -mavx512dq -mavx512vl")
+ set_source_files_properties(${MLAS_SRC_DIR}/q4gemm_avx512.cpp PROPERTIES COMPILE_FLAGS "-mfma -mavx512vnni -mavx512bw -mavx512dq -mavx512vl -mavx512f")
endif()
+ if(NOT APPLE)
+ set(mlas_platform_srcs
+ ${mlas_platform_srcs}
+ ${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmxCommon.S
+ ${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp
+ ${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S
+ )
+ set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
+ set_source_files_properties(${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
+ endif()
if(ONNXRUNTIME_MLAS_MULTI_ARCH)
onnxruntime_add_static_library(onnxruntime_mlas_x86_64 ${mlas_platform_srcs})
@@ -584,7 +581,7 @@ set_target_properties(onnxruntime_mlas PROPERTIES FOLDER "ONNXRuntime")
if (WIN32)
target_compile_options(onnxruntime_mlas PRIVATE "$<$:/wd6385>" "$<$:/wd4127>")
if (onnxruntime_ENABLE_STATIC_ANALYSIS)
- target_compile_options(onnxruntime_mlas PRIVATE "$<$:/analyze:stacksize 131072">)
+ target_compile_options(onnxruntime_mlas PRIVATE "$<$:/analyze:stacksize" 131072>)
endif()
endif()
@@ -595,3 +592,46 @@ if (NOT onnxruntime_BUILD_SHARED_LIB)
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()
+
+
+if (NOT onnxruntime_ORT_MINIMAL_BUILD)
+
+ #
+ # Command line tool for quantization and de-quantization of 2-D fp32 tensors
+ # based on block-wise quantization of int4
+ #
+
+ onnxruntime_add_executable(onnxruntime_mlas_q4dq
+ ${MLAS_SRC_DIR}/q4_dq_cli.cpp
+ )
+ target_include_directories(onnxruntime_mlas_q4dq PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${MLAS_SRC_DIR})
+ set_target_properties(onnxruntime_mlas_q4dq PROPERTIES FOLDER "ONNXRuntimeTest")
+
+ target_link_libraries(onnxruntime_mlas_q4dq PRIVATE ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common)
+ if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ target_link_libraries(onnxruntime_mlas_q4dq PRIVATE cpuinfo)
+ endif()
+ if(NOT WIN32)
+ target_link_libraries(onnxruntime_mlas_q4dq PRIVATE nsync::nsync_cpp ${CMAKE_DL_LIBS})
+ endif()
+ if (CMAKE_SYSTEM_NAME STREQUAL "Android")
+ target_link_libraries(onnxruntime_mlas_q4dq PRIVATE ${android_shared_libs})
+ endif()
+
+ if(WIN32)
+ target_link_libraries(onnxruntime_mlas_q4dq PRIVATE debug Dbghelp Advapi32)
+ endif()
+ if (onnxruntime_LINK_LIBATOMIC)
+ target_link_libraries(onnxruntime_mlas_q4dq PRIVATE atomic)
+ endif()
+ target_link_libraries(onnxruntime_mlas_q4dq PRIVATE Threads::Threads)
+
+ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+ set_target_properties(onnxruntime_mlas_q4dq PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1")
+ else()
+ set_target_properties(onnxruntime_mlas_q4dq PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1")
+ endif()
+ endif()
+
+endif()
diff --git a/cmake/onnxruntime_nodejs.cmake b/cmake/onnxruntime_nodejs.cmake
index 7b4ad950dc50a..6053b9d1088cd 100644
--- a/cmake/onnxruntime_nodejs.cmake
+++ b/cmake/onnxruntime_nodejs.cmake
@@ -60,6 +60,20 @@ else()
endif()
endif()
+# setup providers
+if (onnxruntime_USE_CUDA)
+ set(NODEJS_BINDING_USE_CUDA "--use_cuda")
+endif()
+if (onnxruntime_USE_DML)
+ set(NODEJS_BINDING_USE_DML "--use_dml")
+endif()
+if (onnxruntime_USE_TENSORRT)
+ set(NODEJS_BINDING_USE_TENSORRT "--use_tensorrt")
+endif()
+if (onnxruntime_USE_COREML)
+ set(NODEJS_BINDING_USE_COREML "--use_coreml")
+endif()
+
if(NOT onnxruntime_ENABLE_STATIC_ANALYSIS)
# add custom target
add_custom_target(js_npm_ci ALL
@@ -74,7 +88,9 @@ add_custom_target(js_common_npm_ci ALL
add_custom_target(nodejs_binding_wrapper ALL
COMMAND ${NPM_CLI} ci
- COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE} --arch=${NODEJS_BINDING_ARCH}
+ COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE}
+ --arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_TENSORRT}
+ ${NODEJS_BINDING_USE_COREML}
WORKING_DIRECTORY ${JS_NODE_ROOT}
COMMENT "Using cmake-js to build OnnxRuntime Node.js binding")
diff --git a/cmake/onnxruntime_objectivec.cmake b/cmake/onnxruntime_objectivec.cmake
index 10516077319d1..4be2f51a96ebc 100644
--- a/cmake/onnxruntime_objectivec.cmake
+++ b/cmake/onnxruntime_objectivec.cmake
@@ -40,6 +40,19 @@ file(GLOB onnxruntime_objc_srcs CONFIGURE_DEPENDS
"${OBJC_ROOT}/*.m"
"${OBJC_ROOT}/*.mm")
+if(NOT onnxruntime_ENABLE_TRAINING_APIS)
+ list(REMOVE_ITEM onnxruntime_objc_headers
+ "${OBJC_ROOT}/include/ort_checkpoint.h"
+ "${OBJC_ROOT}/include/ort_training_session.h")
+
+ list(REMOVE_ITEM onnxruntime_objc_srcs
+ "${OBJC_ROOT}/ort_checkpoint_internal.h"
+ "${OBJC_ROOT}/ort_checkpoint.mm"
+ "${OBJC_ROOT}/ort_training_session_internal.h"
+ "${OBJC_ROOT}/ort_training_session.mm")
+endif()
+
+
source_group(TREE "${OBJC_ROOT}" FILES
${onnxruntime_objc_headers}
${onnxruntime_objc_srcs})
@@ -61,6 +74,13 @@ if(onnxruntime_USE_COREML)
"${ONNXRUNTIME_INCLUDE_DIR}/core/providers/coreml")
endif()
+if (onnxruntime_ENABLE_TRAINING_APIS)
+ target_include_directories(onnxruntime_objc
+ PRIVATE
+ "${ORTTRAINING_SOURCE_DIR}/training_api/include/")
+
+endif()
+
find_library(FOUNDATION_LIB Foundation REQUIRED)
target_link_libraries(onnxruntime_objc
@@ -105,6 +125,14 @@ if(onnxruntime_BUILD_UNIT_TESTS)
"${OBJC_ROOT}/test/*.m"
"${OBJC_ROOT}/test/*.mm")
+ if(NOT onnxruntime_ENABLE_TRAINING_APIS)
+ list(REMOVE_ITEM onnxruntime_objc_test_srcs
+ "${OBJC_ROOT}/test/ort_checkpoint_test.mm"
+ "${OBJC_ROOT}/test/ort_training_session_test.mm"
+ "${OBJC_ROOT}/test/ort_training_utils_test.mm")
+
+ endif()
+
source_group(TREE "${OBJC_ROOT}" FILES ${onnxruntime_objc_test_srcs})
xctest_add_bundle(onnxruntime_objc_test onnxruntime_objc
@@ -124,6 +152,7 @@ if(onnxruntime_BUILD_UNIT_TESTS)
add_custom_command(TARGET onnxruntime_objc_test POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${OBJC_ROOT}/test/testdata"
+ "${ONNXRUNTIME_ROOT}/test/testdata/training_api"
"$/Resources")
xctest_add_test(XCTest.onnxruntime_objc_test onnxruntime_objc_test)
diff --git a/cmake/onnxruntime_optimizer.cmake b/cmake/onnxruntime_optimizer.cmake
index 84e64b634a38e..3da4198573d54 100644
--- a/cmake/onnxruntime_optimizer.cmake
+++ b/cmake/onnxruntime_optimizer.cmake
@@ -42,12 +42,18 @@ if (onnxruntime_MINIMAL_BUILD)
"${ONNXRUNTIME_ROOT}/core/optimizer/selectors_actions/selector_action_transformer_apply_contexts.h"
"${ONNXRUNTIME_ROOT}/core/optimizer/selectors_actions/selector_action_transformer.cc"
"${ONNXRUNTIME_ROOT}/core/optimizer/selectors_actions/selector_action_transformer.h"
- "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimizer/optimizer_api_impl.cc"
- "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimizer/optimizer_api.h"
- "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimizer/optimizer_utils.h"
- "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimizer/ort_transpose_optimizer.cc"
- "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimizer/ort_transpose_optimizer.h"
- "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimizer/transpose_optimizer.cc"
+ # files required for layout transformation
+ "${ONNXRUNTIME_ROOT}/core/optimizer/layout_transformation/layout_transformation.h"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/layout_transformation/layout_transformation.cc"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/layout_transformation/layout_transformation_potentially_added_ops.h"
+ # files required for transpose optimization post-layout transformation
+ "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimization/optimizer_api.h"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimization/onnx_transpose_optimization.h"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimization/onnx_transpose_optimization.cc"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimization/ort_optimizer_utils.h"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimization/ort_transpose_optimization.h"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimization/ort_transpose_optimization.cc"
"${ONNXRUNTIME_ROOT}/core/optimizer/utils.cc"
"${ONNXRUNTIME_ROOT}/core/optimizer/utils.h"
)
@@ -59,6 +65,8 @@ else()
"${ONNXRUNTIME_ROOT}/core/optimizer/*.cc"
"${ONNXRUNTIME_ROOT}/core/optimizer/compute_optimizer/*.h"
"${ONNXRUNTIME_ROOT}/core/optimizer/compute_optimizer/*.cc"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/layout_transformation/*.h"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/layout_transformation/*.cc"
"${ONNXRUNTIME_ROOT}/core/optimizer/qdq_transformer/*.h"
"${ONNXRUNTIME_ROOT}/core/optimizer/qdq_transformer/*.cc"
"${ONNXRUNTIME_ROOT}/core/optimizer/qdq_transformer/selectors_actions/*.h"
@@ -67,14 +75,12 @@ else()
"${ONNXRUNTIME_ROOT}/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc"
"${ONNXRUNTIME_ROOT}/core/optimizer/selectors_actions/*.h"
"${ONNXRUNTIME_ROOT}/core/optimizer/selectors_actions/*.cc"
- "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimizer/*.h"
- "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimizer/*.cc"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimization/*.h"
+ "${ONNXRUNTIME_ROOT}/core/optimizer/transpose_optimization/*.cc"
)
endif()
-if (onnxruntime_ENABLE_TRAINING_APIS)
- # we need optimizers for both full build as well as training api only build.
- # Using onnxruntime_ENABLE_TRAINING_APIS since it is always ON in a full training build.
+if (onnxruntime_ENABLE_TRAINING)
list(APPEND onnxruntime_optimizer_src_patterns
"${ORTTRAINING_SOURCE_DIR}/core/optimizer/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/optimizer/*.cc"
@@ -99,17 +105,21 @@ endif()
onnxruntime_add_static_library(onnxruntime_optimizer ${onnxruntime_optimizer_srcs})
-install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/optimizer DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
onnxruntime_add_include_to_target(onnxruntime_optimizer onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
target_include_directories(onnxruntime_optimizer PRIVATE ${ONNXRUNTIME_ROOT})
-if (onnxruntime_ENABLE_TRAINING_APIS)
+if (onnxruntime_ENABLE_TRAINING)
target_include_directories(onnxruntime_optimizer PRIVATE ${ORTTRAINING_ROOT})
endif()
+if (onnxruntime_ENABLE_TRITON)
+ target_link_libraries(onnxruntime_optimizer PRIVATE nlohmann_json::nlohmann_json)
+ onnxruntime_add_include_to_target(onnxruntime_optimizer Python::Module)
+endif()
add_dependencies(onnxruntime_optimizer ${onnxruntime_EXTERNAL_DEPENDENCIES})
set_target_properties(onnxruntime_optimizer PROPERTIES FOLDER "ONNXRuntime")
if (NOT onnxruntime_BUILD_SHARED_LIB)
- install(TARGETS onnxruntime_optimizer
+ install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/optimizer DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
+ install(TARGETS onnxruntime_optimizer
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
index c253b6b9c7197..b9e7873132089 100644
--- a/cmake/onnxruntime_providers.cmake
+++ b/cmake/onnxruntime_providers.cmake
@@ -93,6 +93,11 @@ file(GLOB_RECURSE onnxruntime_rocm_contrib_ops_cu_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/contrib_ops/rocm/*.cuh"
)
+file(GLOB_RECURSE onnxruntime_js_contrib_ops_cc_srcs CONFIGURE_DEPENDS
+ "${ONNXRUNTIME_ROOT}/contrib_ops/js/*.h"
+ "${ONNXRUNTIME_ROOT}/contrib_ops/js/*.cc"
+)
+
file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/*.cc"
@@ -114,7 +119,7 @@ endif()
if(onnxruntime_USE_NNAPI_BUILTIN)
set(PROVIDERS_NNAPI onnxruntime_providers_nnapi)
endif()
-if(onnxruntime_USE_JS)
+if(onnxruntime_USE_JSEP)
set(PROVIDERS_JS onnxruntime_providers_js)
endif()
if(onnxruntime_USE_QNN)
@@ -147,6 +152,9 @@ endif()
if (onnxruntime_USE_XNNPACK)
set(PROVIDERS_XNNPACK onnxruntime_providers_xnnpack)
endif()
+if(onnxruntime_USE_WEBNN)
+ set(PROVIDERS_WEBNN onnxruntime_providers_webnn)
+endif()
if(onnxruntime_USE_SNPE)
include(onnxruntime_snpe_provider.cmake)
endif()
@@ -201,6 +209,8 @@ if (onnxruntime_ENABLE_TRAINING_OPS AND NOT onnxruntime_ENABLE_TRAINING)
"${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/tensorboard/*.h"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/torch/*.cc"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/torch/*.h"
+ "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/triton/triton_op.cc"
+ "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/triton/triton_op.h"
)
list(REMOVE_ITEM onnxruntime_providers_src ${onnxruntime_cpu_full_training_only_srcs})
@@ -230,6 +240,8 @@ if (onnxruntime_ENABLE_TRAINING)
file(GLOB_RECURSE onnxruntime_training_framework_excude_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
)
list(REMOVE_ITEM onnxruntime_cpu_training_ops_srcs ${onnxruntime_training_framework_excude_srcs})
@@ -300,7 +312,7 @@ endif()
if (onnxruntime_ENABLE_TRAINING)
add_dependencies(onnxruntime_providers tensorboard)
onnxruntime_add_include_to_target(onnxruntime_providers tensorboard)
- if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+ if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
onnxruntime_add_include_to_target(onnxruntime_providers Python::Module)
endif()
@@ -309,14 +321,14 @@ if (onnxruntime_ENABLE_TRAINING)
endif()
endif()
-install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cpu DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
+install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cpu/cpu_provider_factory.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
set_target_properties(onnxruntime_providers PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(onnxruntime_providers PROPERTIES FOLDER "ONNXRuntime")
if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin|iOS"
- AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Android")
- AND NOT onnxruntime_BUILD_WEBASSEMBLY)
+ AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android"
+ AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
file(GLOB onnxruntime_providers_shared_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/shared/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/shared/*.cc"
@@ -354,7 +366,7 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
install(TARGETS onnxruntime_providers_shared
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
- RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+ RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
endif()
@@ -424,12 +436,12 @@ if (onnxruntime_USE_CUDA)
"${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/wait.cc"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/wait.h"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/yield.cc"
- "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/controlflow/yield.h"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.cc"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.h"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/gist/*.cu"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/torch/*.cc"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/torch/*.h"
+ "${ORTTRAINING_SOURCE_DIR}/training_ops/cuda/triton/triton_op.cc"
)
list(REMOVE_ITEM onnxruntime_providers_cuda_src ${onnxruntime_cuda_full_training_only_srcs})
@@ -448,125 +460,148 @@ if (onnxruntime_USE_CUDA)
if (onnxruntime_REDUCED_OPS_BUILD)
substitute_op_reduction_srcs(onnxruntime_providers_cuda_src)
endif()
- onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${onnxruntime_providers_cuda_src})
- if (onnxruntime_REDUCED_OPS_BUILD)
- add_op_reduction_include_dirs(onnxruntime_providers_cuda)
- endif()
-
- #target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:-Xcompiler \"/analyze:stacksize 131072\">")
- if (HAS_GUARD_CF)
- target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:-Xcompiler /guard:cf>")
- endif()
- if (HAS_QSPECTRE)
- target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:-Xcompiler /Qspectre>")
- endif()
- foreach(ORT_FLAG ${ORT_WARNING_FLAGS})
- target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:-Xcompiler \"${ORT_FLAG}\">")
- endforeach()
- # CUDA 11.3+ supports parallel compilation
- # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-guiding-compiler-driver-threads
- if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.3)
- target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:--threads \"${onnxruntime_NVCC_THREADS}\">")
- endif()
- if (UNIX)
- target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:-Xcompiler -Wno-reorder>"
- "$<$>:-Wno-reorder>")
- target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:-Xcompiler -Wno-error=sign-compare>"
- "$<$>:-Wno-error=sign-compare>")
- else()
- #mutex.cuh(91): warning C4834: discarding return value of function with 'nodiscard' attribute
- target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:-Xcompiler /wd4834>")
- target_compile_options(onnxruntime_providers_cuda PRIVATE "$<$:SHELL:-Xcompiler /wd4127>")
- endif()
+ # cuda_provider_interface.cc is removed from the object target: onnxruntime_providers_cuda_obj and
+ # add to the lib onnxruntime_providers_cuda separatedly.
+ # onnxruntime_providers_cuda_ut can share all the object files with onnxruntime_providers_cuda except cuda_provider_interface.cc.
+ set(cuda_provider_interface_src ${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_provider_interface.cc)
+ list(REMOVE_ITEM onnxruntime_providers_cuda_src ${cuda_provider_interface_src})
+ onnxruntime_add_object_library(onnxruntime_providers_cuda_obj ${onnxruntime_providers_cuda_src})
+ onnxruntime_add_shared_library_module(onnxruntime_providers_cuda ${cuda_provider_interface_src} $)
+ # config_cuda_provider_shared_module can be used to config onnxruntime_providers_cuda_obj, onnxruntime_providers_cuda & onnxruntime_providers_cuda_ut.
+ # This function guarantees that all 3 targets have the same configurations.
+ function(config_cuda_provider_shared_module target)
+ if (onnxruntime_REDUCED_OPS_BUILD)
+ add_op_reduction_include_dirs(${target})
+ endif()
- onnxruntime_add_include_to_target(onnxruntime_providers_cuda onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers)
- if (onnxruntime_ENABLE_TRAINING_OPS)
- onnxruntime_add_include_to_target(onnxruntime_providers_cuda onnxruntime_training)
- if (onnxruntime_ENABLE_TRAINING)
- target_link_libraries(onnxruntime_providers_cuda PRIVATE onnxruntime_training)
+ if (HAS_GUARD_CF)
+ target_compile_options(${target} PRIVATE "$<$:SHELL:-Xcompiler /guard:cf>")
endif()
- if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
- onnxruntime_add_include_to_target(onnxruntime_providers_cuda Python::Module)
+ if (HAS_QSPECTRE)
+ target_compile_options(${target} PRIVATE "$<$:SHELL:-Xcompiler /Qspectre>")
+ endif()
+ foreach(ORT_FLAG ${ORT_WARNING_FLAGS})
+ target_compile_options(${target} PRIVATE "$<$:SHELL:-Xcompiler \"${ORT_FLAG}\">")
+ endforeach()
+ # CUDA 11.3+ supports parallel compilation
+ # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-guiding-compiler-driver-threads
+ if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.3)
+ target_compile_options(${target} PRIVATE "$<$:SHELL:--threads \"${onnxruntime_NVCC_THREADS}\">")
+ endif()
+ if (UNIX)
+ target_compile_options(${target} PRIVATE "$<$:SHELL:-Xcompiler -Wno-reorder>"
+ "$<$>:-Wno-reorder>")
+ target_compile_options(${target} PRIVATE "$<$:SHELL:-Xcompiler -Wno-error=sign-compare>"
+ "$<$>:-Wno-error=sign-compare>")
+ else()
+ #mutex.cuh(91): warning C4834: discarding return value of function with 'nodiscard' attribute
+ target_compile_options(${target} PRIVATE "$<$:SHELL:-Xcompiler /wd4834>")
+ target_compile_options(${target} PRIVATE "$<$:SHELL:-Xcompiler /wd4127>")
endif()
- endif()
- add_dependencies(onnxruntime_providers_cuda onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
- target_link_libraries(onnxruntime_providers_cuda PRIVATE cublasLt cublas cudnn curand cufft ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface)
- if(onnxruntime_CUDNN_HOME)
- target_include_directories(onnxruntime_providers_cuda PRIVATE ${onnxruntime_CUDNN_HOME}/include)
- target_link_directories(onnxruntime_providers_cuda PRIVATE ${onnxruntime_CUDNN_HOME}/lib)
- endif()
+ onnxruntime_add_include_to_target(${target} onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers)
+ if (onnxruntime_ENABLE_TRAINING_OPS)
+ onnxruntime_add_include_to_target(${target} onnxruntime_training)
+ if (onnxruntime_ENABLE_TRAINING)
+ target_link_libraries(${target} PRIVATE onnxruntime_training)
+ endif()
+ if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
+ onnxruntime_add_include_to_target(${target} Python::Module)
+ endif()
+ endif()
- if (onnxruntime_USE_FLASH_ATTENTION)
- include(cutlass)
- target_include_directories(onnxruntime_providers_cuda PRIVATE ${cutlass_SOURCE_DIR}/include ${cutlass_SOURCE_DIR}/examples)
- endif()
+ add_dependencies(${target} onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
+ target_link_libraries(${target} PRIVATE cublasLt cublas cudnn curand cufft ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface)
+ if(onnxruntime_CUDNN_HOME)
+ target_include_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/include)
+ target_link_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/lib)
+ endif()
- target_include_directories(onnxruntime_providers_cuda PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${TVM_INCLUDES} PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
- # ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
- set_target_properties(onnxruntime_providers_cuda PROPERTIES LINKER_LANGUAGE CUDA)
- set_target_properties(onnxruntime_providers_cuda PROPERTIES FOLDER "ONNXRuntime")
+ if (onnxruntime_USE_TRITON_KERNEL)
+ # compile triton kernel, generate .a and .h files
+ include(onnxruntime_compile_triton_kernel.cmake)
+ compile_triton_kernel(triton_kernel_obj_file triton_kernel_header_dir)
+ add_dependencies(${target} onnxruntime_triton_kernel)
+ target_compile_definitions(${target} PRIVATE USE_TRITON_KERNEL)
+ target_include_directories(${target} PRIVATE ${triton_kernel_header_dir})
+ target_link_libraries(${target} PUBLIC -Wl,--whole-archive ${triton_kernel_obj_file} -Wl,--no-whole-archive)
+ # lib cuda needed by cuLaunchKernel
+ target_link_libraries(${target} PRIVATE cuda)
+ endif()
- if (onnxruntime_ENABLE_CUDA_PROFILING) # configure cupti for cuda profiling
- target_include_directories(onnxruntime_providers_cuda PRIVATE ${onnxruntime_CUDA_HOME}/extras/CUPTI/include)
- target_link_directories(onnxruntime_providers_cuda PRIVATE ${onnxruntime_CUDA_HOME}/extras/CUPTI/lib64)
- target_link_libraries(onnxruntime_providers_cuda PRIVATE cupti)
- endif()
+ if (onnxruntime_USE_FLASH_ATTENTION OR onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)
+ include(cutlass)
+ target_include_directories(${target} PRIVATE ${cutlass_SOURCE_DIR}/include ${cutlass_SOURCE_DIR}/examples)
+ endif()
- if (onnxruntime_ENABLE_NVTX_PROFILE)
- target_link_libraries(onnxruntime_providers_cuda PRIVATE nvToolsExt)
- endif()
+ target_include_directories(${target} PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${TVM_INCLUDES} PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+ # ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
+ set_target_properties(${target} PROPERTIES LINKER_LANGUAGE CUDA)
+ set_target_properties(${target} PROPERTIES FOLDER "ONNXRuntime")
- if (onnxruntime_ENABLE_TRAINING_OPS)
- target_include_directories(onnxruntime_providers_cuda PRIVATE ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS})
- endif()
+ if (onnxruntime_ENABLE_CUDA_PROFILING) # configure cupti for cuda profiling
+ target_include_directories(${target} PRIVATE ${onnxruntime_CUDA_HOME}/extras/CUPTI/include)
+ target_link_directories(${target} PRIVATE ${onnxruntime_CUDA_HOME}/extras/CUPTI/lib64)
+ target_link_libraries(${target} PRIVATE cupti)
+ endif()
- if(onnxruntime_USE_MPI)
- target_link_libraries(onnxruntime_providers_cuda PRIVATE ${MPI_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
- endif()
+ if (onnxruntime_ENABLE_NVTX_PROFILE AND NOT WIN32)
+ target_link_libraries(${target} PRIVATE nvToolsExt)
+ endif()
- if (onnxruntime_USE_NCCL)
- target_include_directories(onnxruntime_providers_cuda PRIVATE ${NCCL_INCLUDE_DIRS})
- target_link_libraries(onnxruntime_providers_cuda PRIVATE ${NCCL_LIBRARIES})
- endif()
+ if (onnxruntime_ENABLE_TRAINING_OPS)
+ target_include_directories(${target} PRIVATE ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS})
+ endif()
- if (WIN32)
- # *.cu cannot use PCH
- if (NOT onnxruntime_BUILD_CACHE)
- target_precompile_headers(onnxruntime_providers_cuda PUBLIC
- "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.h"
- "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.cc"
- )
+ if(onnxruntime_USE_MPI)
+ target_link_libraries(${target} PRIVATE ${MPI_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
endif()
- # minimize the Windows includes.
- # this avoids an issue with CUDA 11.6 where 'small' is defined in the windows and cuda headers.
- target_compile_definitions(onnxruntime_providers_cuda PRIVATE "WIN32_LEAN_AND_MEAN")
+ if (onnxruntime_USE_NCCL)
+ target_include_directories(${target} PRIVATE ${NCCL_INCLUDE_DIRS})
+ target_link_libraries(${target} PRIVATE ${NCCL_LIBRARIES})
+ endif()
- # disable a warning from the CUDA headers about unreferenced local functions
- #target_compile_options(onnxruntime_providers_cuda PRIVATE /wd4505)
- set(onnxruntime_providers_cuda_static_library_flags
- -IGNORE:4221 # LNK4221: This object file does not define any previously undefined public symbols, so it will not be used by any link operation that consumes this library
- )
- set_target_properties(onnxruntime_providers_cuda PROPERTIES
- STATIC_LIBRARY_FLAGS "${onnxruntime_providers_cuda_static_library_flags}")
- endif()
+ if (WIN32)
+ # *.cu cannot use PCH
+ if (NOT onnxruntime_BUILD_CACHE)
+ target_precompile_headers(${target} PUBLIC
+ "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.h"
+ "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_pch.cc"
+ )
+ endif()
- if(APPLE)
- set_property(TARGET onnxruntime_providers_cuda APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/cuda/exported_symbols.lst")
- target_link_libraries(onnxruntime_providers_cuda PRIVATE nsync::nsync_cpp)
- elseif(UNIX)
- set_property(TARGET onnxruntime_providers_cuda APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/cuda/version_script.lds -Xlinker --gc-sections")
- target_link_libraries(onnxruntime_providers_cuda PRIVATE nsync::nsync_cpp)
- elseif(WIN32)
- set_property(TARGET onnxruntime_providers_cuda APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/cuda/symbols.def")
- else()
- message(FATAL_ERROR "onnxruntime_providers_cuda unknown platform, need to specify shared library exports for it")
- endif()
+ # minimize the Windows includes.
+ # this avoids an issue with CUDA 11.6 where 'small' is defined in the windows and cuda headers.
+ target_compile_definitions(${target} PRIVATE "WIN32_LEAN_AND_MEAN")
- if (onnxruntime_ENABLE_ATEN)
- target_compile_definitions(onnxruntime_providers_cuda PRIVATE ENABLE_ATEN)
- endif()
+ # disable a warning from the CUDA headers about unreferenced local functions
+ #target_compile_options(${target} PRIVATE /wd4505)
+ set(onnxruntime_providers_cuda_static_library_flags
+ -IGNORE:4221 # LNK4221: This object file does not define any previously undefined public symbols, so it will not be used by any link operation that consumes this library
+ )
+ set_target_properties(${target} PROPERTIES
+ STATIC_LIBRARY_FLAGS "${onnxruntime_providers_cuda_static_library_flags}")
+ endif()
+
+ if(APPLE)
+ set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/cuda/exported_symbols.lst")
+ target_link_libraries(${target} PRIVATE nsync::nsync_cpp)
+ elseif(UNIX)
+ set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/cuda/version_script.lds -Xlinker --gc-sections")
+ target_link_libraries(${target} PRIVATE nsync::nsync_cpp)
+ elseif(WIN32)
+ set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/cuda/symbols.def")
+ else()
+ message(FATAL_ERROR "${target} unknown platform, need to specify shared library exports for it")
+ endif()
+
+ if (onnxruntime_ENABLE_ATEN)
+ target_compile_definitions(${target} PRIVATE ENABLE_ATEN)
+ endif()
+ endfunction()
+ config_cuda_provider_shared_module(onnxruntime_providers_cuda_obj)
+ config_cuda_provider_shared_module(onnxruntime_providers_cuda)
install(TARGETS onnxruntime_providers_cuda
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
@@ -594,8 +629,9 @@ if (onnxruntime_USE_DNNL)
add_dependencies(onnxruntime_providers_dnnl onnxruntime_providers_shared project_dnnl ${onnxruntime_EXTERNAL_DEPENDENCIES})
target_include_directories(onnxruntime_providers_dnnl PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${DNNL_INCLUDE_DIR} ${DNNL_OCL_INCLUDE_DIR})
# ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
- target_link_libraries(onnxruntime_providers_dnnl PRIVATE dnnl ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${ABSEIL_LIBS} ${GSL_TARGET})
- install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dnnl DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
+ target_link_libraries(onnxruntime_providers_dnnl PRIVATE dnnl ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${ABSEIL_LIBS} ${GSL_TARGET} safeint_interface)
+ install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dnnl/dnnl_provider_options.h
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
set_target_properties(onnxruntime_providers_dnnl PROPERTIES FOLDER "ONNXRuntime")
set_target_properties(onnxruntime_providers_dnnl PROPERTIES LINKER_LANGUAGE CXX)
@@ -658,6 +694,13 @@ if (onnxruntime_USE_TENSORRT)
endif()
set(CXX_VERSION_DEFINED TRUE)
+ # There is an issue when running "Debug build" TRT EP with "Release build" TRT builtin parser on Windows.
+ # We enforce following workaround for now until the real fix.
+ if (WIN32 AND CMAKE_BUILD_TYPE STREQUAL "Debug")
+ set(onnxruntime_USE_TENSORRT_BUILTIN_PARSER OFF)
+ MESSAGE(STATUS "[Note] There is an issue when running \"Debug build\" TRT EP with \"Release build\" TRT built-in parser on Windows. This build will use tensorrt oss parser instead.")
+ endif()
+
if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
# Add TensorRT library
find_path(TENSORRT_INCLUDE_DIR NvInfer.h
@@ -686,6 +729,7 @@ if (onnxruntime_USE_TENSORRT)
onnxruntime_fetchcontent_makeavailable(onnx_tensorrt)
include_directories(${onnx_tensorrt_SOURCE_DIR})
set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
+ set(CUDA_INCLUDE_DIR ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # onnx-tensorrt repo needs this variable to build
if ( CMAKE_COMPILER_IS_GNUCC )
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
endif()
@@ -714,6 +758,8 @@ if (onnxruntime_USE_TENSORRT)
"${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
"${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_stream_handle.h"
"${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_stream_handle.cc"
+ "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_graph.h"
+ "${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_graph.cc"
)
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_tensorrt_cc_srcs})
@@ -731,7 +777,7 @@ if (onnxruntime_USE_TENSORRT)
endif()
# ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
- install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/tensorrt DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
+ set_target_properties(onnxruntime_providers_tensorrt PROPERTIES PUBLIC_HEADER ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h)
set_target_properties(onnxruntime_providers_tensorrt PROPERTIES LINKER_LANGUAGE CUDA)
set_target_properties(onnxruntime_providers_tensorrt PROPERTIES FOLDER "ONNXRuntime")
target_compile_definitions(onnxruntime_providers_tensorrt PRIVATE ONNXIFI_BUILD_LIBRARY=1)
@@ -762,28 +808,53 @@ if (onnxruntime_USE_TENSORRT)
endif()
install(TARGETS onnxruntime_providers_tensorrt
+ PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
- RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+ RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
if (onnxruntime_USE_VITISAI)
- file(GLOB_RECURSE onnxruntime_providers_vitisai_cc_srcs CONFIGURE_DEPENDS
- "${ONNXRUNTIME_ROOT}/core/providers/vitisai/*.h"
+ if ("${GIT_COMMIT_ID}" STREQUAL "")
+ execute_process(
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND git rev-parse HEAD
+ OUTPUT_VARIABLE GIT_COMMIT_ID
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+ endif()
+ configure_file(${ONNXRUNTIME_ROOT}/core/providers/vitisai/imp/version_info.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/VitisAI/version_info.h)
+ file(GLOB onnxruntime_providers_vitisai_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/vitisai/*.cc"
+ "${ONNXRUNTIME_ROOT}/core/providers/vitisai/*.h"
+ "${ONNXRUNTIME_ROOT}/core/providers/vitisai/imp/*.cc"
+ "${ONNXRUNTIME_ROOT}/core/providers/vitisai/imp/*.h"
)
-
+ list(REMOVE_ITEM onnxruntime_providers_vitisai_cc_srcs "${ONNXRUNTIME_ROOT}/core/providers/vitisai/onnxruntime_vitisai_ep_stub.cc")
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_vitisai_cc_srcs})
onnxruntime_add_static_library(onnxruntime_providers_vitisai ${onnxruntime_providers_vitisai_cc_srcs})
- onnxruntime_add_include_to_target(onnxruntime_providers_vitisai
- onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface
- )
- add_dependencies(onnxruntime_providers_vitisai ${onnxruntime_EXTERNAL_DEPENDENCIES})
+ onnxruntime_add_include_to_target(onnxruntime_providers_vitisai onnxruntime_common onnxruntime_framework onnx onnx_proto)
+ onnxruntime_add_shared_library(onnxruntime_vitisai_ep ${ONNXRUNTIME_ROOT}/core/providers/vitisai/onnxruntime_vitisai_ep_stub.cc)
+ onnxruntime_add_include_to_target(onnxruntime_vitisai_ep onnxruntime_common)
+ target_include_directories(onnxruntime_vitisai_ep PRIVATE "${ONNXRUNTIME_ROOT}" "${ONNXRUNTIME_ROOT}/core/providers/vitisai/include")
+ target_link_libraries(onnxruntime_providers_vitisai PUBLIC onnxruntime_vitisai_ep PRIVATE onnx protobuf::libprotobuf nlohmann_json::nlohmann_json )
+ target_compile_definitions(onnxruntime_vitisai_ep
+ PRIVATE "-DONNXRUNTIME_VITISAI_EP_STUB=1" "-DONNXRUNTIME_VITISAI_EP_EXPORT_DLL=1")
+ if(NOT MSVC)
+ target_compile_options(onnxruntime_providers_vitisai PUBLIC $<$:-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0>)
+ endif(NOT MSVC)
+
+ target_include_directories(onnxruntime_providers_vitisai PRIVATE "${ONNXRUNTIME_ROOT}/core/providers/vitisai/include" ${XRT_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}/VitisAI)
+ if(MSVC)
+ target_compile_options(onnxruntime_providers_vitisai PRIVATE "/Zc:__cplusplus")
+ # for dll interface warning.
+ target_compile_options(onnxruntime_providers_vitisai PRIVATE "/wd4251")
+ # for unused formal parameter
+ target_compile_options(onnxruntime_providers_vitisai PRIVATE "/wd4100")
+ else(MSVC)
+ target_compile_options(onnxruntime_providers_vitisai PRIVATE -Wno-unused-parameter)
+ endif(MSVC)
+
set_target_properties(onnxruntime_providers_vitisai PROPERTIES FOLDER "ONNXRuntime")
- target_include_directories(onnxruntime_providers_vitisai PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${VITISAI_INCLUDE_DIR})
- install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/vitisai
- DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers
- )
set_target_properties(onnxruntime_providers_vitisai PROPERTIES LINKER_LANGUAGE CXX)
if (NOT onnxruntime_BUILD_SHARED_LIB)
@@ -834,14 +905,15 @@ if (onnxruntime_USE_OPENVINO)
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx)
- install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
+ install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
set_target_properties(onnxruntime_providers_openvino PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(onnxruntime_providers_openvino PROPERTIES FOLDER "ONNXRuntime")
if(NOT MSVC)
target_compile_options(onnxruntime_providers_openvino PRIVATE "-Wno-parentheses")
endif()
add_dependencies(onnxruntime_providers_openvino onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
- target_include_directories(onnxruntime_providers_openvino SYSTEM PUBLIC ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${OpenVINO_INCLUDE_DIR} ${OPENVINO_INCLUDE_DIR_LIST} ${PYTHON_INCLUDE_DIRS} $ENV{OPENCL_INCS})
+ target_include_directories(onnxruntime_providers_openvino SYSTEM PUBLIC ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${OpenVINO_INCLUDE_DIR} ${OPENVINO_INCLUDE_DIR_LIST} ${PYTHON_INCLUDE_DIRS} $ENV{OPENCL_INCS} $ENV{OPENCL_INCS}/../../cl_headers/)
target_link_libraries(onnxruntime_providers_openvino ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${OPENVINO_LIB_LIST} ${ABSEIL_LIBS})
target_compile_definitions(onnxruntime_providers_openvino PRIVATE VER_MAJOR=${VERSION_MAJOR_PART})
@@ -983,6 +1055,31 @@ if (onnxruntime_USE_COREML)
endif()
endif()
+if (onnxruntime_USE_WEBNN)
+ if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
+ message(FATAL_ERROR "WebNN EP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
+ endif()
+
+ add_compile_definitions(USE_WEBNN=1)
+ if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+ add_definitions(-DENABLE_WEBASSEMBLY_THREADS=1)
+ endif()
+ file(GLOB_RECURSE onnxruntime_providers_webnn_cc_srcs CONFIGURE_DEPENDS
+ "${ONNXRUNTIME_ROOT}/core/providers/webnn/*.h"
+ "${ONNXRUNTIME_ROOT}/core/providers/webnn/*.cc"
+ "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
+ "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
+ )
+
+ source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webnn_cc_srcs})
+ onnxruntime_add_static_library(onnxruntime_providers_webnn ${onnxruntime_providers_webnn_cc_srcs})
+ onnxruntime_add_include_to_target(onnxruntime_providers_webnn onnxruntime_common onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+
+ add_dependencies(onnxruntime_providers_webnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
+ set_target_properties(onnxruntime_providers_webnn PROPERTIES FOLDER "ONNXRuntime")
+ set_target_properties(onnxruntime_providers_webnn PROPERTIES LINKER_LANGUAGE CXX)
+endif()
+
if (onnxruntime_USE_NNAPI_BUILTIN)
if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
message(FATAL_ERROR "NNAPI can not be used in a basic minimal build. Please build with '--minimal_build extended'")
@@ -1067,20 +1164,24 @@ if (onnxruntime_USE_NNAPI_BUILTIN)
endif()
endif()
-if (onnxruntime_USE_JS)
- add_compile_definitions(USE_JS=1)
+if (onnxruntime_USE_JSEP)
+ add_compile_definitions(USE_JSEP=1)
file(GLOB_RECURSE onnxruntime_providers_js_cc_srcs
"${ONNXRUNTIME_ROOT}/core/providers/js/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/js/*.cc"
)
+ if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
+ source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_js_contrib_ops_cc_srcs})
+ list(APPEND onnxruntime_providers_js_cc_srcs ${onnxruntime_js_contrib_ops_cc_srcs})
+ endif()
- source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_js_cc_srcs})
+ source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_providers_js_cc_srcs})
onnxruntime_add_static_library(onnxruntime_providers_js ${onnxruntime_providers_js_cc_srcs})
onnxruntime_add_include_to_target(onnxruntime_providers_js
onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers Boost::mp11
)
-
+ target_include_directories(onnxruntime_providers_js PRIVATE ${eigen_INCLUDE_DIRS})
add_dependencies(onnxruntime_providers_js ${onnxruntime_EXTERNAL_DEPENDENCIES})
endif()
@@ -1183,14 +1284,17 @@ if (onnxruntime_USE_DML)
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_dml_cc_srcs})
onnxruntime_add_static_library(onnxruntime_providers_dml ${onnxruntime_providers_dml_cc_srcs})
onnxruntime_add_include_to_target(onnxruntime_providers_dml
- onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface WIL::WIL
+ onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface ${WIL_TARGET}
)
add_dependencies(onnxruntime_providers_dml ${onnxruntime_EXTERNAL_DEPENDENCIES})
target_include_directories(onnxruntime_providers_dml PRIVATE
${ONNXRUNTIME_ROOT}
)
- add_definitions(-DDML_TARGET_VERSION_USE_LATEST=1)
+ target_compile_definitions(onnxruntime_providers_dml PRIVATE DML_TARGET_VERSION_USE_LATEST=1)
+ if(WIN32)
+ target_compile_options(onnxruntime_providers_dml PRIVATE "/wd4100" "/wd4238" "/wd4189" "/wd4702")
+ endif()
if (NOT onnxruntime_USE_CUSTOM_DIRECTML)
foreach(file "DirectML.dll" "DirectML.pdb" "DirectML.Debug.dll" "DirectML.Debug.pdb")
@@ -1246,8 +1350,8 @@ if (onnxruntime_USE_DML)
target_compile_options(onnxruntime_providers_dml PRIVATE "/W3")
endif()
- install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dml
- DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers
+ install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dml/dml_provider_factory.h
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/
)
set_target_properties(onnxruntime_providers_dml PROPERTIES LINKER_LANGUAGE CXX)
@@ -1305,7 +1409,6 @@ if (onnxruntime_USE_MIGRAPHX)
add_dependencies(onnxruntime_providers_migraphx onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
target_link_libraries(onnxruntime_providers_migraphx PRIVATE ${migraphx_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} onnx flatbuffers::flatbuffers Boost::mp11 safeint_interface)
target_include_directories(onnxruntime_providers_migraphx PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR})
- install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/migraphx DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
set_target_properties(onnxruntime_providers_migraphx PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(onnxruntime_providers_migraphx PROPERTIES FOLDER "ONNXRuntime")
target_compile_definitions(onnxruntime_providers_migraphx PRIVATE ONNXIFI_BUILD_LIBRARY=1)
@@ -1350,8 +1453,8 @@ if (onnxruntime_USE_ACL)
PRIVATE
${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${onnxruntime_ACL_HOME} ${onnxruntime_ACL_HOME}/include
)
- install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/acl
- DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers
+ install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/acl/acl_provider_factory.h
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/
)
set_target_properties(onnxruntime_providers_acl PROPERTIES LINKER_LANGUAGE CXX)
@@ -1383,9 +1486,9 @@ if (onnxruntime_USE_ARMNN)
${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${onnxruntime_ARMNN_HOME} ${onnxruntime_ARMNN_HOME}/include
${onnxruntime_ACL_HOME} ${onnxruntime_ACL_HOME}/include
)
- install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/armnn
- DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers
- )
+ install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/armnn/armnn_provider_factory.h
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
+
set_target_properties(onnxruntime_providers_armnn PROPERTIES LINKER_LANGUAGE CXX)
if (NOT onnxruntime_BUILD_SHARED_LIB)
@@ -1401,7 +1504,7 @@ if (onnxruntime_USE_ROCM)
add_definitions(-DUSE_ROCM=1)
include(onnxruntime_rocm_hipify.cmake)
- list(APPEND CMAKE_PREFIX_PATH ${onnxruntime_ROCM_HOME}/rccl ${onnxruntime_ROCM_HOME}/roctracer)
+ list(APPEND CMAKE_PREFIX_PATH ${onnxruntime_ROCM_HOME})
find_package(HIP)
find_package(hiprand REQUIRED)
@@ -1410,12 +1513,21 @@ if (onnxruntime_USE_ROCM)
# MIOpen version
if(NOT DEFINED ENV{MIOPEN_PATH})
- set(MIOPEN_PATH ${onnxruntime_ROCM_HOME}/miopen)
+ set(MIOPEN_PATH ${onnxruntime_ROCM_HOME})
else()
set(MIOPEN_PATH $ENV{MIOPEN_PATH})
endif()
+ find_path(MIOPEN_VERSION_H_PATH
+ NAMES version.h
+ HINTS
+ ${MIOPEN_PATH}/include/miopen
+ ${MIOPEN_PATH}/miopen/include)
+ if (MIOPEN_VERSION_H_PATH-NOTFOUND)
+ MESSAGE(FATAL_ERROR "miopen version.h not found")
+ endif()
+ MESSAGE(STATUS "Found miopen version.h at ${MIOPEN_VERSION_H_PATH}")
- file(READ ${MIOPEN_PATH}/include/miopen/version.h MIOPEN_HEADER_CONTENTS)
+ file(READ ${MIOPEN_VERSION_H_PATH}/version.h MIOPEN_HEADER_CONTENTS)
string(REGEX MATCH "define MIOPEN_VERSION_MAJOR * +([0-9]+)"
MIOPEN_VERSION_MAJOR "${MIOPEN_HEADER_CONTENTS}")
string(REGEX REPLACE "define MIOPEN_VERSION_MAJOR * +([0-9]+)" "\\1"
@@ -1535,7 +1647,6 @@ if (onnxruntime_USE_ROCM)
PUBLIC
${onnxruntime_ROCM_HOME}/include
${onnxruntime_ROCM_HOME}/include/roctracer)
- install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/rocm DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
set_target_properties(onnxruntime_providers_rocm PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(onnxruntime_providers_rocm PROPERTIES FOLDER "ONNXRuntime")
@@ -1565,6 +1676,16 @@ if (onnxruntime_USE_ROCM)
target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_HIPBLASLT)
endif()
+ if (onnxruntime_USE_TRITON_KERNEL)
+ # compile triton kernel, generate .a and .h files
+ include(onnxruntime_compile_triton_kernel.cmake)
+ compile_triton_kernel(triton_kernel_obj_file triton_kernel_header_dir)
+ add_dependencies(onnxruntime_providers_rocm onnxruntime_triton_kernel)
+ target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_TRITON_KERNEL)
+ target_include_directories(onnxruntime_providers_rocm PRIVATE ${triton_kernel_header_dir})
+ target_link_libraries(onnxruntime_providers_rocm PUBLIC -Wl,--whole-archive ${triton_kernel_obj_file} -Wl,--no-whole-archive)
+ endif()
+
if (onnxruntime_USE_COMPOSABLE_KERNEL)
include(composable_kernel)
target_link_libraries(onnxruntime_providers_rocm PRIVATE
@@ -1576,6 +1697,8 @@ if (onnxruntime_USE_ROCM)
device_gemm_instance
device_gemm_add_fastgelu_instance
device_gemm_fastgelu_instance
+ device_gemm_splitk_instance
+ device_gemm_streamk_instance
device_batched_gemm_instance
device_softmax_instance
)
@@ -1652,7 +1775,8 @@ if (onnxruntime_USE_TVM)
endif()
target_compile_definitions(onnxruntime_providers_tvm PUBLIC DMLC_USE_LOGGING_LIBRARY=)
- install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/tvm DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
+ install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/tvm/tvm_provider_factory.h
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
if (NOT onnxruntime_BUILD_SHARED_LIB)
install(TARGETS onnxruntime_providers_tvm
@@ -1678,16 +1802,12 @@ if (onnxruntime_USE_XNNPACK)
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_xnnpack_cc_srcs})
onnxruntime_add_static_library(onnxruntime_providers_xnnpack ${onnxruntime_providers_xnnpack_cc_srcs})
onnxruntime_add_include_to_target(onnxruntime_providers_xnnpack
- onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} XNNPACK pthreadpool Boost::mp11 safeint_interface
+ onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} XNNPACK pthreadpool flatbuffers::flatbuffers Boost::mp11 safeint_interface
)
add_dependencies(onnxruntime_providers_xnnpack onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
set_target_properties(onnxruntime_providers_xnnpack PROPERTIES FOLDER "ONNXRuntime")
- install(DIRECTORY ${ONNXRUNTIME_INCLUDE_DIR}/core/providers/xnnpack
- DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers
- )
-
set_target_properties(onnxruntime_providers_xnnpack PROPERTIES LINKER_LANGUAGE CXX)
if (NOT onnxruntime_BUILD_SHARED_LIB)
@@ -1697,6 +1817,12 @@ if (onnxruntime_USE_XNNPACK)
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()
+
+ # TODO fix shorten-64-to-32 warnings
+ # there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline'
+ if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+ target_compile_options(onnxruntime_providers_xnnpack PRIVATE -Wno-error=shorten-64-to-32)
+ endif()
endif()
if (onnxruntime_USE_CANN)
diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake
index c39960e401652..bf9adbaefabcc 100644
--- a/cmake/onnxruntime_python.cmake
+++ b/cmake/onnxruntime_python.cmake
@@ -20,10 +20,6 @@ file(GLOB onnxruntime_pybind_srcs CONFIGURE_DEPENDS
${onnxruntime_pybind_srcs_pattern}
)
-if(NOT onnxruntime_PYBIND_EXPORT_OPSCHEMA)
- list(REMOVE_ITEM onnxruntime_pybind_srcs ${ONNXRUNTIME_ROOT}/python/onnxruntime_pybind_schema.cc)
-endif()
-
if(onnxruntime_ENABLE_TRAINING)
list(REMOVE_ITEM onnxruntime_pybind_srcs ${ONNXRUNTIME_ROOT}/python/onnxruntime_pybind_module.cc)
endif()
@@ -116,7 +112,7 @@ if (onnxruntime_USE_NCCL)
endif()
if(APPLE)
- set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/python/exported_symbols.lst")
+ set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker -exported_symbols_list -Xlinker ${ONNXRUNTIME_ROOT}/python/exported_symbols.lst")
elseif(UNIX)
if (onnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS)
set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/python/version_script_expose_onnx_protobuf.lds -Xlinker --gc-sections")
@@ -227,7 +223,7 @@ if (MSVC)
# Explicitly use the release version of the python library to make the project file consistent with this.
target_link_libraries(onnxruntime_pybind11_state PRIVATE ${Python_LIBRARY_RELEASE})
elseif (APPLE)
- set_target_properties(onnxruntime_pybind11_state PROPERTIES LINK_FLAGS "${ONNXRUNTIME_SO_LINK_FLAG} -undefined dynamic_lookup")
+ set_target_properties(onnxruntime_pybind11_state PROPERTIES LINK_FLAGS "${ONNXRUNTIME_SO_LINK_FLAG} -Xlinker -undefined -Xlinker dynamic_lookup")
set_target_properties(onnxruntime_pybind11_state PROPERTIES
INSTALL_RPATH "@loader_path"
BUILD_WITH_INSTALL_RPATH TRUE
@@ -286,7 +282,13 @@ if (WIN32)
endif()
get_filename_component(CUDNN_DLL_NAME ${CUDNN_DLL_PATH} NAME_WE)
string(REPLACE "cudnn64_" "" CUDNN_VERSION "${CUDNN_DLL_NAME}")
-
+ if(NOT onnxruntime_CUDA_VERSION)
+ message("Reading json file ${onnxruntime_CUDA_HOME}/version.json")
+ set(CUDA_SDK_JSON_FILE_PATH "${onnxruntime_CUDA_HOME}/version.json")
+ file(READ ${CUDA_SDK_JSON_FILE_PATH} CUDA_SDK_JSON_CONTENT)
+ string(JSON onnxruntime_CUDA_VERSION GET ${CUDA_SDK_JSON_CONTENT} "cuda" "version")
+ message("onnxruntime_CUDA_VERSION=${onnxruntime_CUDA_VERSION}")
+ endif()
file(APPEND "${VERSION_INFO_FILE}"
"cuda_version = \"${onnxruntime_CUDA_VERSION}\"\n"
"cudnn_version = \"${CUDNN_VERSION}\"\n"
@@ -385,8 +387,17 @@ if (onnxruntime_ENABLE_TRAINING)
file(GLOB onnxruntime_python_ortmodule_torch_cpp_ext_fused_ops_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/*"
)
+ file(GLOB onnxruntime_python_ort_triton_srcs CONFIGURE_DEPENDS
+ "${ORTTRAINING_SOURCE_DIR}/python/training/ort_triton/*.py"
+ )
+ file(GLOB onnxruntime_python_ort_triton_kernel_srcs CONFIGURE_DEPENDS
+ "${ORTTRAINING_SOURCE_DIR}/python/training/ort_triton/kernel/*.py"
+ )
+ file(GLOB onnxruntime_python_utils_srcs CONFIGURE_DEPENDS
+ "${ORTTRAINING_SOURCE_DIR}/python/training/utils/*.py"
+ )
file(GLOB onnxruntime_python_utils_data_srcs CONFIGURE_DEPENDS
- "${ORTTRAINING_SOURCE_DIR}/python/training/utils/data/*"
+ "${ORTTRAINING_SOURCE_DIR}/python/training/utils/data/*"
)
file(GLOB onnxruntime_python_utils_hooks_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/python/training/utils/hooks/*"
@@ -455,6 +466,9 @@ file(GLOB onnxruntime_python_transformers_models_bert_src CONFIGURE_DEPENDS
file(GLOB onnxruntime_python_transformers_models_gpt2_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/tools/transformers/models/gpt2/*.py"
)
+file(GLOB onnxruntime_python_transformers_models_llama_src CONFIGURE_DEPENDS
+ "${ONNXRUNTIME_ROOT}/python/tools/transformers/models/llama/*.py"
+)
file(GLOB onnxruntime_python_transformers_models_longformer_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/tools/transformers/models/longformer/*.py"
)
@@ -464,6 +478,9 @@ file(GLOB onnxruntime_python_transformers_models_stable_diffusion_src CONFIGURE_
file(GLOB onnxruntime_python_transformers_models_t5_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/tools/transformers/models/t5/*.py"
)
+file(GLOB onnxruntime_python_transformers_models_whisper_src CONFIGURE_DEPENDS
+ "${ONNXRUNTIME_ROOT}/python/tools/transformers/models/whisper/*.py"
+)
file(GLOB onnxruntime_python_datasets_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/datasets/*.py"
)
@@ -499,6 +516,12 @@ file(GLOB onnxruntime_qdq_helper_srcs CONFIGURE_DEPENDS
${REPO_ROOT}/tools/python/util/qdq_helpers/*.py
)
+if (onnxruntime_USE_OPENVINO)
+ file(GLOB onnxruntime_python_openvino_python_srcs CONFIGURE_DEPENDS
+ ${REPO_ROOT}/tools/python/util/add_openvino_win_libs.py
+ )
+endif()
+
set(build_output_target onnxruntime_common)
if(NOT onnxruntime_ENABLE_STATIC_ANALYSIS)
add_custom_command(
@@ -517,9 +540,11 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models/bart
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models/bert
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models/gpt2
+ COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models/llama
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models/longformer
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models/stable_diffusion
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models/t5
+ COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/transformers/models/whisper
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/quantization
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/quantization/operators
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/quantization/CalTableFlatBuffers
@@ -607,6 +632,9 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_transformers_models_gpt2_src}
$/onnxruntime/transformers/models/gpt2/
+ COMMAND ${CMAKE_COMMAND} -E copy
+ ${onnxruntime_python_transformers_models_llama_src}
+ $/onnxruntime/transformers/models/llama/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_transformers_models_longformer_src}
$/onnxruntime/transformers/models/longformer/
@@ -616,11 +644,23 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_transformers_models_t5_src}
$/onnxruntime/transformers/models/t5/
+ COMMAND ${CMAKE_COMMAND} -E copy
+ ${onnxruntime_python_transformers_models_whisper_src}
+ $/onnxruntime/transformers/models/whisper/
COMMAND ${CMAKE_COMMAND} -E copy
${REPO_ROOT}/VERSION_NUMBER
$
)
+if (onnxruntime_USE_OPENVINO)
+ add_custom_command(
+ TARGET onnxruntime_pybind11_state POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy
+ ${onnxruntime_python_openvino_python_srcs}
+ $/onnxruntime/tools/
+ )
+endif()
+
if (onnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS)
add_custom_command(
TARGET onnxruntime_pybind11_state POST_BUILD
@@ -639,9 +679,9 @@ endif()
if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin|iOS"
- AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Android")
+ AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android"
AND NOT onnxruntime_USE_ROCM
- AND NOT onnxruntime_BUILD_WEBASSEMBLY)
+ AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
add_custom_command(
TARGET onnxruntime_pybind11_state POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
@@ -701,6 +741,9 @@ if (onnxruntime_ENABLE_TRAINING)
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/training/ortmodule/torch_cpp_extensions/cpu/torch_interop_utils
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/training/ortmodule/torch_cpp_extensions/cuda/torch_gpu_allocator
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/training/ortmodule/torch_cpp_extensions/cuda/fused_ops
+ COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/training/ort_triton
+ COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/training/ort_triton/kernel
+ COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/training/utils
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/training/utils/data/
COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/training/utils/hooks/
COMMAND ${CMAKE_COMMAND} -E copy
@@ -751,6 +794,15 @@ if (onnxruntime_ENABLE_TRAINING)
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_ortmodule_torch_cpp_ext_fused_ops_srcs}
$/onnxruntime/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/
+ COMMAND ${CMAKE_COMMAND} -E copy
+ ${onnxruntime_python_ort_triton_srcs}
+ $/onnxruntime/training/ort_triton/
+ COMMAND ${CMAKE_COMMAND} -E copy
+ ${onnxruntime_python_ort_triton_kernel_srcs}
+ $/onnxruntime/training/ort_triton/kernel/
+ COMMAND ${CMAKE_COMMAND} -E copy
+ ${onnxruntime_python_utils_srcs}
+ $/onnxruntime/training/utils/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_utils_data_srcs}
$/onnxruntime/training/utils/data/
@@ -777,7 +829,6 @@ if (onnxruntime_ENABLE_TRAINING)
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_api_srcs}
$/onnxruntime/training/api/
-
)
endif()
endif()
@@ -897,10 +948,15 @@ if (onnxruntime_USE_TVM)
endif()
if (onnxruntime_USE_DML)
+ if (NOT onnxruntime_USE_CUSTOM_DIRECTML)
+ set(dml_shared_lib_path ${DML_PACKAGE_DIR}/bin/${onnxruntime_target_platform}-win/${DML_SHARED_LIB})
+ else()
+ set(dml_shared_lib_path ${DML_PACKAGE_DIR}/bin/${DML_SHARED_LIB})
+ endif()
add_custom_command(
TARGET onnxruntime_pybind11_state POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
- ${DML_PACKAGE_DIR}/bin/${onnxruntime_target_platform}-win/${DML_SHARED_LIB}
+ ${dml_shared_lib_path}
$/onnxruntime/capi/
)
endif()
diff --git a/cmake/onnxruntime_rocm_hipify.cmake b/cmake/onnxruntime_rocm_hipify.cmake
index caaa89c84f5cd..cf71b6bcf7c7d 100644
--- a/cmake/onnxruntime_rocm_hipify.cmake
+++ b/cmake/onnxruntime_rocm_hipify.cmake
@@ -10,6 +10,9 @@ set(contrib_ops_excluded_files
"bert/attention_impl.cu"
"bert/attention_softmax.h"
"bert/attention_softmax.cu"
+ "bert/attention_prepare_qkv.cu"
+ "bert/decoder_attention_impl.h"
+ "bert/decoder_attention_impl.cu"
"bert/decoder_masked_multihead_attention.h"
"bert/decoder_masked_multihead_attention.cc"
"bert/decoder_masked_self_attention.h"
@@ -37,6 +40,10 @@ set(contrib_ops_excluded_files
"bert/packed_attention.cc"
"bert/packed_attention_impl.h"
"bert/packed_attention_impl.cu"
+ "bert/packed_multihead_attention.h"
+ "bert/packed_multihead_attention.cc"
+ "bert/packed_multihead_attention_impl.h"
+ "bert/packed_multihead_attention_impl.cu"
"diffusion/group_norm.cc"
"diffusion/group_norm_impl.cu"
"diffusion/group_norm_impl.h"
@@ -88,8 +95,6 @@ set(contrib_ops_excluded_files
"tensor/image_scaler.h"
"tensor/image_scaler_impl.cu"
"tensor/image_scaler_impl.h"
- "transformers/beam_search.cc"
- "transformers/beam_search.h"
"transformers/greedy_search.cc"
"transformers/greedy_search.h"
"conv_transpose_with_dynamic_pads.cc"
@@ -109,8 +114,6 @@ endif()
set(provider_excluded_files
"atomic/common.cuh"
- "controlflow/if.cc"
- "controlflow/if.h"
"controlflow/loop.cc"
"controlflow/loop.h"
"controlflow/scan.cc"
@@ -199,6 +202,10 @@ set(training_ops_excluded_files
"reduction/reduction_ops.cc" # no double type support
"cuda_training_kernels.cc"
"cuda_training_kernels.h"
+ "nn/conv_shared.cc"
+ "nn/conv_shared.h"
+ "nn/conv_transpose_grad.cc"
+ "nn/conv_transpose_grad.h"
)
function(auto_set_source_files_hip_language)
diff --git a/cmake/onnxruntime_session.cmake b/cmake/onnxruntime_session.cmake
index 5120517acf9e7..79bee3bdb65d6 100644
--- a/cmake/onnxruntime_session.cmake
+++ b/cmake/onnxruntime_session.cmake
@@ -30,7 +30,6 @@ endif()
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_session_srcs})
onnxruntime_add_static_library(onnxruntime_session ${onnxruntime_session_srcs})
-install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/session DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface nlohmann_json::nlohmann_json)
if(onnxruntime_ENABLE_INSTRUMENT)
target_compile_definitions(onnxruntime_session PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT)
@@ -63,6 +62,7 @@ if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
endif()
if (NOT onnxruntime_BUILD_SHARED_LIB)
+ install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/session DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
install(TARGETS onnxruntime_session
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
diff --git a/cmake/onnxruntime_training.cmake b/cmake/onnxruntime_training.cmake
index 01f143867e44f..f9ba2b341f741 100644
--- a/cmake/onnxruntime_training.cmake
+++ b/cmake/onnxruntime_training.cmake
@@ -22,6 +22,8 @@ file(GLOB_RECURSE onnxruntime_training_srcs
file(GLOB_RECURSE onnxruntime_training_framework_excluded_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
+ "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
)
list(REMOVE_ITEM onnxruntime_training_srcs ${onnxruntime_training_framework_excluded_srcs})
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index c0c50e0e11b08..ec83eb2095071 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -23,7 +23,6 @@ function(AddTest)
else()
onnxruntime_add_executable(${_UT_TARGET} ${_UT_SOURCES})
endif()
-
if (_UT_DEPENDS)
list(REMOVE_DUPLICATES _UT_DEPENDS)
endif(_UT_DEPENDS)
@@ -40,6 +39,9 @@ function(AddTest)
endif()
if (MSVC)
target_compile_options(${_UT_TARGET} PRIVATE "/wd6330")
+ #Abseil has a lot of C4127/C4324 warnings.
+ target_compile_options(${_UT_TARGET} PRIVATE "/wd4127")
+ target_compile_options(${_UT_TARGET} PRIVATE "/wd4324")
endif()
set_target_properties(${_UT_TARGET} PROPERTIES FOLDER "ONNXRuntimeTest")
@@ -61,7 +63,7 @@ function(AddTest)
target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock ${onnxruntime_EXTERNAL_LIBRARIES})
endif()
- onnxruntime_add_include_to_target(${_UT_TARGET} date_interface flatbuffers::flatbuffers)
+ onnxruntime_add_include_to_target(${_UT_TARGET} date::date flatbuffers::flatbuffers)
target_include_directories(${_UT_TARGET} PRIVATE ${TEST_INC_DIR})
if (onnxruntime_USE_CUDA)
target_include_directories(${_UT_TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${onnxruntime_CUDNN_HOME}/include)
@@ -113,7 +115,7 @@ function(AddTest)
set(TEST_ARGS ${_UT_TEST_ARGS})
if (onnxruntime_GENERATE_TEST_REPORTS)
# generate a report file next to the test program
- if (onnxruntime_BUILD_WEBASSEMBLY)
+ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
# WebAssembly use a memory file system, so we do not use full path
list(APPEND TEST_ARGS
"--gtest_output=xml:$.$.results.xml")
@@ -147,7 +149,7 @@ function(AddTest)
else()
target_link_libraries(${_UT_TARGET}_xc PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock ${onnxruntime_EXTERNAL_LIBRARIES})
endif()
- onnxruntime_add_include_to_target(${_UT_TARGET}_xc date_interface flatbuffers::flatbuffers)
+ onnxruntime_add_include_to_target(${_UT_TARGET}_xc date::date flatbuffers::flatbuffers)
target_include_directories(${_UT_TARGET}_xc PRIVATE ${TEST_INC_DIR})
get_target_property(${_UT_TARGET}_DEFS ${_UT_TARGET} COMPILE_DEFINITIONS)
target_compile_definitions(${_UT_TARGET}_xc PRIVATE ${_UT_TARGET}_DEFS)
@@ -162,7 +164,7 @@ function(AddTest)
xctest_add_test(xctest.${_UT_TARGET} ${_UT_TARGET}_xc)
else()
- if (onnxruntime_BUILD_WEBASSEMBLY)
+ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
# We might have already executed the following "find_program" code when we build ORT nodejs binding.
# Then the program is found the result is stored in the variable and the search will not be repeated.
find_program(NPM_CLI
@@ -202,11 +204,15 @@ function(AddTest)
WORKING_DIRECTORY $
)
endif()
+ # Set test timeout to 3 hours.
+ set_tests_properties(${_UT_TARGET} PROPERTIES TIMEOUT 7200)
else()
add_test(NAME ${_UT_TARGET}
COMMAND ${_UT_TARGET} ${TEST_ARGS}
WORKING_DIRECTORY $
)
+ # Set test timeout to 3 hours.
+ set_tests_properties(${_UT_TARGET} PROPERTIES TIMEOUT 7200)
endif()
endif()
endfunction(AddTest)
@@ -280,22 +286,33 @@ if((NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_EXTENDED_MINIMAL_BUILD)
endif()
file(GLOB onnxruntime_test_training_src
+ "${ORTTRAINING_SOURCE_DIR}/test/model/*.h"
"${ORTTRAINING_SOURCE_DIR}/test/model/*.cc"
+ "${ORTTRAINING_SOURCE_DIR}/test/gradient/*.h"
"${ORTTRAINING_SOURCE_DIR}/test/gradient/*.cc"
+ "${ORTTRAINING_SOURCE_DIR}/test/graph/*.h"
"${ORTTRAINING_SOURCE_DIR}/test/graph/*.cc"
+ "${ORTTRAINING_SOURCE_DIR}/test/session/*.h"
"${ORTTRAINING_SOURCE_DIR}/test/session/*.cc"
+ "${ORTTRAINING_SOURCE_DIR}/test/optimizer/*.h"
"${ORTTRAINING_SOURCE_DIR}/test/optimizer/*.cc"
"${ORTTRAINING_SOURCE_DIR}/test/framework/*.cc"
+ "${ORTTRAINING_SOURCE_DIR}/test/distributed/*.h"
"${ORTTRAINING_SOURCE_DIR}/test/distributed/*.cc"
)
-if (onnxruntime_ENABLE_TRAINING_APIS)
- file(GLOB onnxruntime_test_training_api_src
- "${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.cc"
- "${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.h"
- "${ORTTRAINING_SOURCE_DIR}/test/training_api/core/*.cc"
- "${ORTTRAINING_SOURCE_DIR}/test/training_api/core/*.h"
- )
+# TODO (baijumeswani): Remove the minimal build check here.
+# The training api tests should be runnable even on a minimal build.
+# This requires converting all the *.onnx files to ort format.
+if (NOT onnxruntime_MINIMAL_BUILD)
+ if (onnxruntime_ENABLE_TRAINING_APIS)
+ file(GLOB onnxruntime_test_training_api_src
+ "${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.cc"
+ "${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.h"
+ "${ORTTRAINING_SOURCE_DIR}/test/training_api/core/*.cc"
+ "${ORTTRAINING_SOURCE_DIR}/test/training_api/core/*.h"
+ )
+ endif()
endif()
if(WIN32)
@@ -351,7 +368,7 @@ endif()
list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cpu_src})
if (onnxruntime_USE_CUDA AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
- file(GLOB_RECURSE onnxruntime_test_providers_cuda_src CONFIGURE_DEPENDS
+ file(GLOB onnxruntime_test_providers_cuda_src CONFIGURE_DEPENDS
"${TEST_SRC_DIR}/providers/cuda/*"
)
list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cuda_src})
@@ -364,26 +381,29 @@ if (onnxruntime_USE_CANN)
list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cann_src})
endif()
-if (onnxruntime_ENABLE_TRAINING_APIS)
- file(GLOB_RECURSE orttraining_test_trainingops_cpu_src CONFIGURE_DEPENDS
- "${ORTTRAINING_SOURCE_DIR}/test/training_ops/compare_provider_test_utils.cc"
- "${ORTTRAINING_SOURCE_DIR}/test/training_ops/function_op_test_utils.cc"
- "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cpu/*"
- )
-
- if (NOT onnxruntime_ENABLE_TRAINING)
- list(REMOVE_ITEM orttraining_test_trainingops_cpu_src
- "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cpu/tensorboard/summary_op_test.cc"
+# Disable training ops test for minimal build as a lot of these depend on loading an onnx model.
+if (NOT onnxruntime_MINIMAL_BUILD)
+ if (onnxruntime_ENABLE_TRAINING_OPS)
+ file(GLOB_RECURSE orttraining_test_trainingops_cpu_src CONFIGURE_DEPENDS
+ "${ORTTRAINING_SOURCE_DIR}/test/training_ops/compare_provider_test_utils.cc"
+ "${ORTTRAINING_SOURCE_DIR}/test/training_ops/function_op_test_utils.cc"
+ "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cpu/*"
)
- endif()
- list(APPEND onnxruntime_test_providers_src ${orttraining_test_trainingops_cpu_src})
+ if (NOT onnxruntime_ENABLE_TRAINING)
+ list(REMOVE_ITEM orttraining_test_trainingops_cpu_src
+ "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cpu/tensorboard/summary_op_test.cc"
+ )
+ endif()
- if (onnxruntime_USE_CUDA OR onnxruntime_USE_ROCM)
- file(GLOB_RECURSE orttraining_test_trainingops_cuda_src CONFIGURE_DEPENDS
- "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cuda/*"
- )
- list(APPEND onnxruntime_test_providers_src ${orttraining_test_trainingops_cuda_src})
+ list(APPEND onnxruntime_test_providers_src ${orttraining_test_trainingops_cpu_src})
+
+ if (onnxruntime_USE_CUDA OR onnxruntime_USE_ROCM)
+ file(GLOB_RECURSE orttraining_test_trainingops_cuda_src CONFIGURE_DEPENDS
+ "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cuda/*"
+ )
+ list(APPEND onnxruntime_test_providers_src ${orttraining_test_trainingops_cuda_src})
+ endif()
endif()
endif()
@@ -417,7 +437,6 @@ endif()
set (ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR "${TEST_SRC_DIR}/shared_lib")
set (ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR "${TEST_SRC_DIR}/global_thread_pools")
-set (ONNXRUNTIME_API_TESTS_WITHOUT_ENV_SRC_DIR "${TEST_SRC_DIR}/api_tests_without_env")
set (ONNXRUNTIME_CUSTOM_OP_REGISTRATION_TEST_SRC_DIR "${TEST_SRC_DIR}/custom_op_registration")
set (ONNXRUNTIME_LOGGING_APIS_TEST_SRC_DIR "${TEST_SRC_DIR}/logging_apis")
@@ -447,9 +466,6 @@ set (onnxruntime_global_thread_pools_test_SRC
${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_main.cc
${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_inference.cc)
-set (onnxruntime_api_tests_without_env_SRC
- ${ONNXRUNTIME_API_TESTS_WITHOUT_ENV_SRC_DIR}/test_apis_without_env.cc)
-
# tests from lowest level library up.
# the order of libraries should be maintained, with higher libraries being added first in the list
@@ -504,7 +520,7 @@ if(onnxruntime_USE_NNAPI_BUILTIN)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_nnapi)
endif()
-if(onnxruntime_USE_JS)
+if(onnxruntime_USE_JSEP)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js)
endif()
@@ -556,6 +572,7 @@ set(ONNXRUNTIME_TEST_LIBS
# CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, and OpenVINO are dynamically loaded at runtime
${PROVIDERS_NNAPI}
${PROVIDERS_JS}
+ ${PROVIDERS_VITISAI}
${PROVIDERS_QNN}
${PROVIDERS_SNPE}
${PROVIDERS_RKNPU}
@@ -609,7 +626,7 @@ if(onnxruntime_USE_NNAPI_BUILTIN)
list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_nnapi)
endif()
-if(onnxruntime_USE_JS)
+if(onnxruntime_USE_JSEP)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/js/*)
list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_js)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js)
@@ -705,6 +722,7 @@ add_dependencies(onnxruntime_test_utils ${onnxruntime_EXTERNAL_DEPENDENCIES})
target_include_directories(onnxruntime_test_utils PUBLIC "${TEST_SRC_DIR}/util/include" PRIVATE
${eigen_INCLUDE_DIRS} ${ONNXRUNTIME_ROOT})
set_target_properties(onnxruntime_test_utils PROPERTIES FOLDER "ONNXRuntimeTest")
+source_group(TREE ${TEST_SRC_DIR} FILES ${onnxruntime_test_utils_src})
set(onnx_test_runner_src_dir ${TEST_SRC_DIR}/onnx)
file(GLOB onnx_test_runner_common_srcs CONFIGURE_DEPENDS
@@ -737,13 +755,22 @@ set_target_properties(onnx_test_runner_common PROPERTIES FOLDER "ONNXRuntimeTest
set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_optimizer_src}
${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src} ${onnxruntime_test_quantiztion_src})
-if(NOT TARGET onnxruntime AND NOT onnxruntime_BUILD_WEBASSEMBLY)
+if(NOT TARGET onnxruntime AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
list(APPEND all_tests ${onnxruntime_shared_lib_test_SRC})
endif()
if (onnxruntime_USE_CUDA)
onnxruntime_add_static_library(onnxruntime_test_cuda_ops_lib ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu)
list(APPEND onnxruntime_test_common_libs onnxruntime_test_cuda_ops_lib)
+ file(GLOB onnxruntime_test_providers_cuda_ut_src CONFIGURE_DEPENDS
+ "${TEST_SRC_DIR}/providers/cuda/test_cases/*"
+ )
+ # onnxruntime_providers_cuda_ut is only for unittests.
+ onnxruntime_add_shared_library_module(onnxruntime_providers_cuda_ut ${onnxruntime_test_providers_cuda_ut_src} $)
+ config_cuda_provider_shared_module(onnxruntime_providers_cuda_ut)
+ onnxruntime_add_include_to_target(onnxruntime_providers_cuda_ut GTest::gtest GTest::gmock)
+ target_link_libraries(onnxruntime_providers_cuda_ut PRIVATE GTest::gtest GTest::gmock)
+ list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_cuda_ut)
endif()
set(all_dependencies ${onnxruntime_test_providers_dependencies} )
@@ -771,7 +798,7 @@ if(WIN32)
list(APPEND onnxruntime_test_providers_libs Advapi32)
endif()
-if (onnxruntime_BUILD_WEBASSEMBLY)
+if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
if (NOT onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
list(REMOVE_ITEM all_tests
"${TEST_SRC_DIR}/framework/execution_frame_test.cc"
@@ -814,6 +841,18 @@ else()
target_compile_options(onnxruntime_test_all PRIVATE "-Wno-parentheses")
endif()
+# TODO fix shorten-64-to-32 warnings
+# there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline'
+if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+ target_compile_options(onnxruntime_test_all PRIVATE -Wno-error=shorten-64-to-32)
+endif()
+
+if (UNIX AND onnxruntime_USE_TENSORRT)
+ # The test_main.cc includes NvInfer.h where it has many deprecated declarations
+ # simply ignore them for TensorRT EP build
+ set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
+endif()
+
if (MSVC AND onnxruntime_ENABLE_STATIC_ANALYSIS)
# attention_op_test.cc: Function uses '49152' bytes of stack: exceeds /analyze:stacksize '16384'..
target_compile_options(onnxruntime_test_all PRIVATE "/analyze:stacksize 131072")
@@ -845,15 +884,22 @@ endif()
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
target_link_libraries(onnxruntime_test_all PRIVATE Python::Python)
endif()
-if (onnxruntime_BUILD_WEBASSEMBLY)
+if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set_target_properties(onnxruntime_test_all PROPERTIES LINK_DEPENDS ${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js)
- set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=1048576 -s ALLOW_MEMORY_GROWTH=1 --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1")
+ set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s ALLOW_MEMORY_GROWTH=1 --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1 -s DEMANGLE_SUPPORT=1")
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
- set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s USE_PTHREADS=1 -s PROXY_TO_PTHREAD=1")
+ set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s DEFAULT_PTHREAD_STACK_SIZE=131072 -s PROXY_TO_PTHREAD=1")
endif()
- if (onnxruntime_USE_JS)
+ if (onnxruntime_USE_JSEP)
set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " --pre-js \"${ONNXRUNTIME_ROOT}/wasm/js_internal_api.js\"")
endif()
+
+ ###
+ ### if you want to investigate or debug a test failure in onnxruntime_test_all, replace the following line.
+ ### those flags slow down the CI test significantly, so we don't use them by default.
+ ###
+ # set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=2")
+ set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=0 -s SAFE_HEAP=0 -s STACK_OVERFLOW_CHECK=1")
endif()
if (onnxruntime_ENABLE_ATEN)
@@ -866,12 +912,6 @@ onnxruntime_add_static_library(onnx_test_data_proto ${TEST_SRC_DIR}/proto/tml.pr
add_dependencies(onnx_test_data_proto onnx_proto ${onnxruntime_EXTERNAL_DEPENDENCIES})
#onnx_proto target should mark this definition as public, instead of private
target_compile_definitions(onnx_test_data_proto PRIVATE "-DONNX_API=")
-if(WIN32)
- target_compile_options(onnx_test_data_proto PRIVATE "/wd4100" "/wd4125" "/wd4127" "/wd4267" "/wd4456" "/wd4800" "/wd6011" "/wd6387" "/wd28182")
-else()
- #Once we upgrade protobuf to 3.17.3+, we can remove this
- target_compile_options(onnx_test_data_proto PRIVATE "-Wno-unused-parameter")
-endif()
onnxruntime_add_include_to_target(onnx_test_data_proto onnx_proto)
target_include_directories(onnx_test_data_proto PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
set_target_properties(onnx_test_data_proto PROPERTIES FOLDER "ONNXRuntimeTest")
@@ -1002,9 +1042,9 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO"
)
endif()
-if (onnxruntime_BUILD_WEBASSEMBLY)
+if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
- set_target_properties(onnx_test_runner PROPERTIES LINK_FLAGS "-s NODERAWFS=1 -s ALLOW_MEMORY_GROWTH=1 -s USE_PTHREADS=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1")
+ set_target_properties(onnx_test_runner PROPERTIES LINK_FLAGS "-s NODERAWFS=1 -s ALLOW_MEMORY_GROWTH=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1")
else()
set_target_properties(onnx_test_runner PROPERTIES LINK_FLAGS "-s NODERAWFS=1 -s ALLOW_MEMORY_GROWTH=1")
endif()
@@ -1087,7 +1127,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
else()
target_link_libraries(onnxruntime_mlas_benchmark PRIVATE nsync::nsync_cpp ${CMAKE_DL_LIBS})
endif()
- if (CPUINFO_SUPPORTED AND NOT onnxruntime_BUILD_WEBASSEMBLY)
+ if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
target_link_libraries(onnxruntime_mlas_benchmark PRIVATE cpuinfo)
endif()
set_target_properties(onnxruntime_mlas_benchmark PROPERTIES FOLDER "ONNXRuntimeTest")
@@ -1097,7 +1137,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
target_compile_options(onnx_test_runner_common PRIVATE -D_CRT_SECURE_NO_WARNINGS)
endif()
- if (NOT onnxruntime_REDUCED_OPS_BUILD AND NOT onnxruntime_BUILD_WEBASSEMBLY)
+ if (NOT onnxruntime_REDUCED_OPS_BUILD AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
add_test(NAME onnx_test_pytorch_converted
COMMAND onnx_test_runner ${onnx_SOURCE_DIR}/onnx/backend/test/data/pytorch-converted)
add_test(NAME onnx_test_pytorch_operator
@@ -1211,6 +1251,9 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_providers_snpe)
endif()
endif()
+ if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ list(APPEND onnxruntime_shared_lib_test_LIBS cpuinfo)
+ endif()
if (onnxruntime_USE_CUDA)
list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_test_cuda_ops_lib cudart)
endif()
@@ -1244,6 +1287,12 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
$/testdata)
endif()
+ if (UNIX AND onnxruntime_USE_TENSORRT)
+ # The test_main.cc includes NvInfer.h where it has many deprecated declarations
+ # simply ignore them for TensorRT EP build
+ set_property(TARGET onnxruntime_shared_lib_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
+ endif()
+
# test inference using global threadpools
if (NOT CMAKE_SYSTEM_NAME MATCHES "Android|iOS" AND NOT onnxruntime_MINIMAL_BUILD)
AddTest(DYN
@@ -1253,16 +1302,6 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
DEPENDS ${all_dependencies}
)
endif()
-
- # A separate test is needed to test the APIs that don't rely on the env being created first.
- if (NOT CMAKE_SYSTEM_NAME MATCHES "Android|iOS")
- AddTest(DYN
- TARGET onnxruntime_api_tests_without_env
- SOURCES ${onnxruntime_api_tests_without_env_SRC}
- LIBS ${onnxruntime_shared_lib_test_LIBS}
- DEPENDS ${all_dependencies}
- )
- endif()
endif()
# the debug node IO functionality uses static variables, so it is best tested
@@ -1274,7 +1313,14 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
"${TEST_SRC_DIR}/debug_node_inputs_outputs/debug_node_inputs_outputs_utils_test.cc"
"${TEST_SRC_DIR}/framework/TestAllocatorManager.cc"
"${TEST_SRC_DIR}/framework/test_utils.cc"
- "${TEST_SRC_DIR}/providers/provider_test_utils.cc"
+ "${TEST_SRC_DIR}/providers/base_tester.h"
+ "${TEST_SRC_DIR}/providers/base_tester.cc"
+ "${TEST_SRC_DIR}/providers/checkers.h"
+ "${TEST_SRC_DIR}/providers/checkers.cc"
+ "${TEST_SRC_DIR}/providers/op_tester.h"
+ "${TEST_SRC_DIR}/providers/op_tester.cc"
+ "${TEST_SRC_DIR}/providers/provider_test_utils.h"
+ "${TEST_SRC_DIR}/providers/tester_types.h"
${onnxruntime_unittest_main_src}
LIBS ${onnxruntime_test_providers_libs} ${onnxruntime_test_common_libs}
DEPENDS ${all_dependencies}
@@ -1327,7 +1373,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
target_include_directories(onnxruntime_mlas_test PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${ONNXRUNTIME_ROOT}
${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(onnxruntime_mlas_test PRIVATE GTest::gtest GTest::gmock ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common)
- if (CPUINFO_SUPPORTED AND NOT onnxruntime_BUILD_WEBASSEMBLY)
+ if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
target_link_libraries(onnxruntime_mlas_test PRIVATE cpuinfo)
endif()
if(NOT WIN32)
@@ -1346,16 +1392,21 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
target_link_libraries(onnxruntime_mlas_test PRIVATE Threads::Threads)
set_target_properties(onnxruntime_mlas_test PROPERTIES FOLDER "ONNXRuntimeTest")
- if (onnxruntime_BUILD_WEBASSEMBLY)
+ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
- set_target_properties(onnxruntime_mlas_test PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1 -s USE_PTHREADS=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1")
+ set_target_properties(onnxruntime_mlas_test PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1")
else()
set_target_properties(onnxruntime_mlas_test PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1")
endif()
endif()
# Training API Tests
- if (onnxruntime_ENABLE_TRAINING_APIS)
+ # Disabling training_api_test_trainer. CXXOPT generates a ton of warnings because of which nuget pipeline is failing.
+ # TODO(askhade): Fix the warnings.
+ # This has no impact on the release as the release package and the pipeline, both do not use this.
+ # This is used by devs for testing training apis.
+ #if (onnxruntime_ENABLE_TRAINING_APIS)
+ if (0)
# Only files in the trainer and common folder will be compiled into test trainer.
file(GLOB training_api_test_trainer_src
"${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.cc"
@@ -1407,20 +1458,41 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
endif()
endif()
-if (NOT onnxruntime_BUILD_WEBASSEMBLY)
+if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+
+ set(custom_op_src_patterns
+ "${TEST_SRC_DIR}/testdata/custom_op_library/*.h"
+ "${TEST_SRC_DIR}/testdata/custom_op_library/*.cc"
+ "${TEST_SRC_DIR}/testdata/custom_op_library/cpu/cpu_ops.*"
+ )
+
+ set(custom_op_lib_include ${REPO_ROOT}/include)
+ set(custom_op_lib_option)
+ set(custom_op_lib_link ${GSL_TARGET})
+
if (onnxruntime_USE_CUDA)
- onnxruntime_add_shared_library(custom_op_library ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu
- ${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.cc)
- target_include_directories(custom_op_library PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+ list(APPEND custom_op_src_patterns
+ "${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu"
+ "${TEST_SRC_DIR}/testdata/custom_op_library/cuda/cuda_ops.*")
+ list(APPEND custom_op_lib_include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${onnxruntime_CUDNN_HOME}/include)
if (HAS_QSPECTRE)
- target_compile_options(custom_op_library PRIVATE "$<$:SHELL:-Xcompiler /Qspectre>")
+ list(APPEND custom_op_lib_option "$<$:SHELL:-Xcompiler /Qspectre>")
endif()
- else()
- onnxruntime_add_shared_library(custom_op_library ${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.cc)
endif()
- target_include_directories(custom_op_library PRIVATE ${REPO_ROOT}/include)
- target_link_libraries(custom_op_library PRIVATE ${GSL_TARGET})
+ if (onnxruntime_USE_ROCM)
+ list(APPEND custom_op_src_patterns
+ "${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/rocm_ops.hip"
+ "${TEST_SRC_DIR}/testdata/custom_op_library/rocm/rocm_ops.*")
+ list(APPEND custom_op_lib_include ${onnxruntime_ROCM_HOME}/include)
+ list(APPEND custom_op_lib_option "-D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1")
+ endif()
+
+ file(GLOB custom_op_src ${custom_op_src_patterns})
+ onnxruntime_add_shared_library(custom_op_library ${custom_op_src})
+ target_compile_options(custom_op_library PRIVATE ${custom_op_lib_option})
+ target_include_directories(custom_op_library PRIVATE ${REPO_ROOT}/include ${custom_op_lib_include})
+ target_link_libraries(custom_op_library PRIVATE ${GSL_TARGET} ${custom_op_lib_link})
if(UNIX)
if (APPLE)
@@ -1480,6 +1552,12 @@ if (NOT onnxruntime_BUILD_WEBASSEMBLY)
${ONNXRUNTIME_CUSTOM_OP_REGISTRATION_TEST_SRC_DIR}/test_registercustomops.cc)
set(onnxruntime_customopregistration_test_LIBS custom_op_library onnxruntime_common onnxruntime_test_utils)
+ if (NOT WIN32)
+ list(APPEND onnxruntime_customopregistration_test_LIBS nsync::nsync_cpp)
+ endif()
+ if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+ list(APPEND onnxruntime_customopregistration_test_LIBS cpuinfo)
+ endif()
if (onnxruntime_USE_TENSORRT)
list(APPEND onnxruntime_customopregistration_test_LIBS ${TENSORRT_LIBRARY_INFER})
endif()
@@ -1497,11 +1575,18 @@ if (NOT onnxruntime_BUILD_WEBASSEMBLY)
${TEST_DATA_SRC}
$/testdata)
endif()
+
+ if (UNIX AND onnxruntime_USE_TENSORRT)
+ # The test_main.cc includes NvInfer.h where it has many deprecated declarations
+ # simply ignore them for TensorRT EP build
+ set_property(TARGET onnxruntime_customopregistration_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
+ endif()
+
endif()
endif()
# Build custom op library that returns an error OrtStatus when the exported RegisterCustomOps function is called.
-if (NOT onnxruntime_BUILD_WEBASSEMBLY AND (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
+if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
onnxruntime_add_shared_library_module(custom_op_invalid_library
${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.h
${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.cc)
@@ -1524,7 +1609,7 @@ if (NOT onnxruntime_BUILD_WEBASSEMBLY AND (NOT onnxruntime_MINIMAL_BUILD OR onnx
${ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG})
endif()
-if (NOT onnxruntime_BUILD_WEBASSEMBLY AND (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
+if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
file(GLOB_RECURSE custom_op_get_const_input_test_library_src
"${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.cc"
@@ -1555,7 +1640,7 @@ if (NOT onnxruntime_BUILD_WEBASSEMBLY AND (NOT onnxruntime_MINIMAL_BUILD OR onnx
${ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG})
endif()
-if (onnxruntime_BUILD_SHARED_LIB AND NOT onnxruntime_BUILD_WEBASSEMBLY AND NOT onnxruntime_MINIMAL_BUILD)
+if (onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND NOT onnxruntime_MINIMAL_BUILD)
set (onnxruntime_logging_apis_test_SRC
${ONNXRUNTIME_LOGGING_APIS_TEST_SRC_DIR}/test_logging_apis.cc)
@@ -1573,7 +1658,7 @@ if (onnxruntime_BUILD_SHARED_LIB AND NOT onnxruntime_BUILD_WEBASSEMBLY AND NOT o
)
endif()
-if (NOT onnxruntime_BUILD_WEBASSEMBLY AND onnxruntime_USE_OPENVINO AND (NOT onnxruntime_MINIMAL_BUILD OR
+if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND onnxruntime_USE_OPENVINO AND (NOT onnxruntime_MINIMAL_BUILD OR
onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
onnxruntime_add_shared_library_module(custom_op_openvino_wrapper_library
${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.cc
@@ -1601,8 +1686,8 @@ endif()
# limit to only test on windows first, due to a runtime path issue on linux
if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin|iOS"
- AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Android")
- AND NOT onnxruntime_BUILD_WEBASSEMBLY
+ AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android"
+ AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten"
AND NOT onnxruntime_USE_ROCM)
file(GLOB_RECURSE test_execution_provider_srcs
"${REPO_ROOT}/onnxruntime/test/testdata/custom_execution_provider_library/*.h"
diff --git a/cmake/onnxruntime_webassembly.cmake b/cmake/onnxruntime_webassembly.cmake
index 80a44ffb3fa63..c6510c97a617e 100644
--- a/cmake/onnxruntime_webassembly.cmake
+++ b/cmake/onnxruntime_webassembly.cmake
@@ -110,6 +110,7 @@ if (onnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB)
onnxruntime_providers
${PROVIDERS_JS}
${PROVIDERS_XNNPACK}
+ ${PROVIDERS_WEBNN}
onnxruntime_session
onnxruntime_util
re2::re2
@@ -186,6 +187,7 @@ else()
onnxruntime_providers
${PROVIDERS_JS}
${PROVIDERS_XNNPACK}
+ ${PROVIDERS_WEBNN}
onnxruntime_session
onnxruntime_util
re2::re2
@@ -194,13 +196,17 @@ else()
target_link_libraries(onnxruntime_webassembly PRIVATE XNNPACK)
endif()
+ if(onnxruntime_USE_WEBNN)
+ target_link_libraries(onnxruntime_webassembly PRIVATE onnxruntime_providers_webnn)
+ endif()
+
if (onnxruntime_ENABLE_TRAINING)
target_link_libraries(onnxruntime_webassembly PRIVATE tensorboard)
endif()
set(EXPORTED_RUNTIME_METHODS "['stackAlloc','stackRestore','stackSave','UTF8ToString','stringToUTF8','lengthBytesUTF8']")
- if (onnxruntime_USE_JS)
- set(EXPORTED_FUNCTIONS "_malloc,_free,_JsepOutput")
+ if (onnxruntime_USE_JSEP)
+ set(EXPORTED_FUNCTIONS "_malloc,_free,_JsepOutput,_JsepGetNodeName")
else()
set(EXPORTED_FUNCTIONS "_malloc,_free")
endif()
@@ -219,17 +225,18 @@ else()
--no-entry
)
- if (onnxruntime_USE_JS)
+ if (onnxruntime_USE_JSEP)
# NOTE: "-s ASYNCIFY=1" is required for JSEP to work with WebGPU
# This flag allows async functions to be called from sync functions, in the cost of binary size and
# build time. See https://emscripten.org/docs/porting/asyncify.html for more details.
- target_compile_definitions(onnxruntime_webassembly PRIVATE USE_JS=1)
+ target_compile_definitions(onnxruntime_webassembly PRIVATE USE_JSEP=1)
target_link_options(onnxruntime_webassembly PRIVATE
--pre-js "${ONNXRUNTIME_ROOT}/wasm/js_internal_api.js"
"SHELL:-s ASYNCIFY=1"
"SHELL:-s ASYNCIFY_STACK_SIZE=65536"
)
+ set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/js_internal_api.js)
endif()
if (onnxruntime_EMSCRIPTEN_SETTINGS)
@@ -242,7 +249,7 @@ else()
target_link_options(onnxruntime_webassembly PRIVATE
"SHELL:-s ASSERTIONS=2"
"SHELL:-s SAFE_HEAP=1"
- "SHELL:-s STACK_OVERFLOW_CHECK=1"
+ "SHELL:-s STACK_OVERFLOW_CHECK=2"
"SHELL:-s DEMANGLE_SUPPORT=1"
)
else()
@@ -255,6 +262,10 @@ else()
)
endif()
+ if (onnxruntime_USE_WEBNN)
+ set_property(TARGET onnxruntime_webassembly APPEND_STRING PROPERTY LINK_FLAGS " --bind -sWASM_BIGINT")
+ endif()
+
# Set link flag to enable exceptions support, this will override default disabling exception throwing behavior when disable exceptions.
target_link_options(onnxruntime_webassembly PRIVATE "SHELL:-s DISABLE_EXCEPTION_THROWING=0")
@@ -265,21 +276,31 @@ else()
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
target_link_options(onnxruntime_webassembly PRIVATE
"SHELL:-s EXPORT_NAME=ortWasmThreaded"
- "SHELL:-s USE_PTHREADS=1"
+ "SHELL:-s DEFAULT_PTHREAD_STACK_SIZE=131072"
)
- if (onnxruntime_ENABLE_WEBASSEMBLY_SIMD)
- set_target_properties(onnxruntime_webassembly PROPERTIES OUTPUT_NAME "ort-wasm-simd-threaded")
- else()
- set_target_properties(onnxruntime_webassembly PROPERTIES OUTPUT_NAME "ort-wasm-threaded")
- endif()
else()
target_link_options(onnxruntime_webassembly PRIVATE
"SHELL:-s EXPORT_NAME=ortWasm"
)
- if (onnxruntime_ENABLE_WEBASSEMBLY_SIMD)
- set_target_properties(onnxruntime_webassembly PROPERTIES OUTPUT_NAME "ort-wasm-simd")
- else()
- set_target_properties(onnxruntime_webassembly PROPERTIES OUTPUT_NAME "ort-wasm")
- endif()
endif()
+
+ set(target_name_list ort)
+
+ if (onnxruntime_ENABLE_TRAINING_APIS)
+ list(APPEND target_name_list "training")
+ endif()
+
+ list(APPEND target_name_list "wasm")
+
+ if (onnxruntime_ENABLE_WEBASSEMBLY_SIMD)
+ list(APPEND target_name_list "simd")
+ endif()
+
+ if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+ list(APPEND target_name_list "threaded")
+ endif()
+
+ list(JOIN target_name_list "-" target_name)
+
+ set_target_properties(onnxruntime_webassembly PROPERTIES OUTPUT_NAME ${target_name})
endif()
diff --git a/cmake/patches/composable_kernel/Fix_Clang_Build.patch b/cmake/patches/composable_kernel/Fix_Clang_Build.patch
index ae2a015c1d0ea..d564ffba914fe 100644
--- a/cmake/patches/composable_kernel/Fix_Clang_Build.patch
+++ b/cmake/patches/composable_kernel/Fix_Clang_Build.patch
@@ -1,5 +1,5 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
-index f861e3020..f0b6bceae 100644
+index 514b98fde..59c8a568a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
@@ -11,7 +11,7 @@ index f861e3020..f0b6bceae 100644
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
-@@ -41,27 +41,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
+@@ -94,27 +94,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")
@@ -39,7 +39,7 @@ index f861e3020..f0b6bceae 100644
## HIP
find_package(HIP REQUIRED)
# Override HIP version in config.h, if necessary.
-@@ -83,8 +62,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
+@@ -136,8 +115,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
message(STATUS "CK_HIP_VERSION_PATCH overriden with ${CK_OVERRIDE_HIP_VERSION_PATCH}")
endif()
message(STATUS "Build with HIP ${HIP_VERSION}")
@@ -48,17 +48,7 @@ index f861e3020..f0b6bceae 100644
## tidy
include(EnableCompilerWarnings)
-@@ -273,9 +250,6 @@ rocm_package_setup_component(profiler
- )
-
- add_subdirectory(library)
--add_subdirectory(example)
--add_subdirectory(test)
--add_subdirectory(profiler)
-
- #Create an interface target for the include only files and call it "composablekernels"
- include(CMakePackageConfigHelpers)
-@@ -301,11 +275,3 @@ rocm_install(FILES
+@@ -391,11 +368,3 @@ rocm_install(FILES
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
@@ -71,7 +61,7 @@ index f861e3020..f0b6bceae 100644
- HEADER_ONLY
-)
diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
-index c206c4dc0..b283eeb64 100644
+index 1d54a141b..4edd7dbfb 100644
--- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt
+++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
@@ -1,7 +1,13 @@
diff --git a/cmake/patches/flatbuffers/flatbuffers.patch b/cmake/patches/flatbuffers/flatbuffers.patch
new file mode 100644
index 0000000000000..fb2678ef1bdce
--- /dev/null
+++ b/cmake/patches/flatbuffers/flatbuffers.patch
@@ -0,0 +1,36 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 3987eac9..5e5462f1 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -223,7 +223,7 @@ elseif(CMAKE_COMPILER_IS_GNUCXX)
+ "${CMAKE_CXX_FLAGS} -std=c++0x")
+ endif(CYGWIN)
+ set(CMAKE_CXX_FLAGS
+- "${CMAKE_CXX_FLAGS} -Wall -pedantic -Werror -Wextra -Werror=shadow")
++ "${CMAKE_CXX_FLAGS} -Wall -pedantic -Werror -Wextra -Werror=shadow -Wno-error=stringop-overflow")
+ set(FLATBUFFERS_PRIVATE_CXX_FLAGS "-Wold-style-cast")
+ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.4)
+ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
+diff --git a/src/idl_gen_rust.cpp b/src/idl_gen_rust.cpp
+index 55b8439b..dc03e8a8 100644
+--- a/src/idl_gen_rust.cpp
++++ b/src/idl_gen_rust.cpp
+@@ -406,7 +406,8 @@ class RustGenerator : public BaseGenerator {
+ // example: f(A, D::E) -> super::D::E
+ // does not include leaf object (typically a struct type).
+
+- size_t i = 0;
++ // fix unused but set variable warning
++ //size_t i = 0;
+ std::stringstream stream;
+
+ auto s = src->components.begin();
+@@ -417,7 +418,7 @@ class RustGenerator : public BaseGenerator {
+ if (*s != *d) { break; }
+ ++s;
+ ++d;
+- ++i;
++ //++i;
+ }
+
+ for (; s != src->components.end(); ++s) { stream << "super::"; }
diff --git a/cmake/patches/onnx/onnx.patch b/cmake/patches/onnx/onnx.patch
index d261a178c6b3d..155d153019f85 100644
--- a/cmake/patches/onnx/onnx.patch
+++ b/cmake/patches/onnx/onnx.patch
@@ -35,25 +35,45 @@ diff --git a/onnx/onnx_pb.h b/onnx/onnx_pb.h
index 0aab3e26..0f859267 100644
--- a/onnx/onnx_pb.h
+++ b/onnx/onnx_pb.h
-@@ -47,10 +47,21 @@
+@@ -47,10 +47,28 @@
#define ONNX_API ONNX_IMPORT
#endif
-
-+// onnx/onnx-operators-ml.pb.h:1178:25: required from here
-+// protobuf/src/google/protobuf/repeated_ptr_field.h:752:66: error: unused parameter ‘prototype’ [-Werror=unused-parameter]
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
-+#pragma GCC diagnostic ignored "-Wunused-parameter"
++
++// In file included from onnx/onnx-ml.pb.h:30:
++// In file included from google/protobuf/extension_set.h:53:
++// google/protobuf/parse_context.h:328:47: error: implicit conversion loses integer precision: 'long' to 'int' [-Werror,-Wshorten-64-to-32]
++#if defined(__has_warning)
++#if __has_warning("-Wshorten-64-to-32")
++#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+#endif
++#endif // defined(__has_warning)
++
++#endif // defined(__GNUC__)
+
#ifdef ONNX_ML
#include "onnx/onnx-ml.pb.h"
#else
#include "onnx/onnx.pb.h"
#endif
-
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
#endif // ! ONNX_ONNX_PB_H
+diff --git a/onnx/checker.cc b/onnx/checker.cc
+index 8fdaf037..1beb1b88 100644
+--- a/onnx/checker.cc
++++ b/onnx/checker.cc
+@@ -190,7 +190,7 @@ void check_tensor(const TensorProto& tensor, const CheckerContext& ctx) {
+ }
+ std::string data_path = path_join(ctx.get_model_dir(), relative_path);
+ // use stat64 to check whether the file exists
+-#ifdef __APPLE__
++#if defined(__APPLE__) || defined(__wasm__)
+ struct stat buffer; // APPLE does not have stat64
+ if (stat((data_path).c_str(), &buffer) != 0) {
+ #else
diff --git a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch
index 7296f2f30f286..37bdbf9fb53f6 100644
--- a/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch
+++ b/cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch
@@ -1,8 +1,8 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
-index d53c48aa1..4c987bd7a 100755
+index d53c48aa1..77c3cf983 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
-@@ -105,7 +105,7 @@ ENDIF()
+@@ -105,22 +105,12 @@ ENDIF()
IF(NOT CMAKE_SYSTEM_NAME)
MESSAGE(FATAL_ERROR "CMAKE_SYSTEM_NAME not defined")
@@ -11,7 +11,22 @@ index d53c48aa1..4c987bd7a 100755
MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME = ${CMAKE_SYSTEM_NAME}")
ENDIF()
-@@ -7108,6 +7108,10 @@ IF(MSVC)
+ # ---[ Download deps
+ IF(NOT XNNPACK_USE_SYSTEM_LIBS)
+- IF(NOT DEFINED CLOG_SOURCE_DIR)
+- MESSAGE(STATUS "Downloading clog to ${CMAKE_BINARY_DIR}/clog-source (define CLOG_SOURCE_DIR to avoid it)")
+- CONFIGURE_FILE(cmake/DownloadCLog.cmake "${CMAKE_BINARY_DIR}/clog-download/CMakeLists.txt")
+- EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
+- WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/clog-download")
+- EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
+- WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/clog-download")
+- SET(CLOG_SOURCE_DIR "${CMAKE_BINARY_DIR}/clog-source" CACHE STRING "clog source directory")
+- ENDIF()
+-
+ IF(NOT DEFINED CPUINFO_SOURCE_DIR)
+ MESSAGE(STATUS "Downloading cpuinfo to ${CMAKE_BINARY_DIR}/cpuinfo-source (define CPUINFO_SOURCE_DIR to avoid it)")
+ CONFIGURE_FILE(cmake/DownloadCpuinfo.cmake "${CMAKE_BINARY_DIR}/cpuinfo-download/CMakeLists.txt")
+@@ -7108,6 +7098,10 @@ IF(MSVC)
SET_PROPERTY(SOURCE ${ALL_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS "$<$>: /O2 >")
SET_PROPERTY(SOURCE ${HOT_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS "$<$>: /O2 >")
SET_PROPERTY(SOURCE ${COLD_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS "$<$>: /O1 >")
@@ -22,3 +37,30 @@ index d53c48aa1..4c987bd7a 100755
ELSE()
SET_PROPERTY(SOURCE ${ALL_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS "$<$>: -O2 >")
SET_PROPERTY(SOURCE ${HOT_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS "$<$>: -O2 >")
+@@ -7142,26 +7136,6 @@ IF(LIBM)
+ TARGET_LINK_LIBRARIES(indirection PRIVATE ${LIBM})
+ ENDIF()
+
+-# ---[ Configure clog
+-IF(NOT TARGET clog)
+- IF(NOT XNNPACK_USE_SYSTEM_LIBS)
+- SET(CLOG_BUILD_TESTS OFF CACHE BOOL "")
+- SET(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "")
+- ADD_SUBDIRECTORY(
+- "${CLOG_SOURCE_DIR}/deps/clog"
+- "${CMAKE_BINARY_DIR}/clog")
+- # We build static version of clog but a dynamic library may indirectly depend on it
+- SET_PROPERTY(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON)
+- ELSE()
+- ADD_LIBRARY(clog STATIC IMPORTED)
+- FIND_LIBRARY(CLOG_LIBRARY clog)
+- IF(NOT CLOG_LIBRARY)
+- MESSAGE(FATAL_ERROR "Cannot find clog")
+- ENDIF()
+- SET_PROPERTY(TARGET clog PROPERTY IMPORTED_LOCATION "${CLOG_LIBRARY}")
+- ENDIF()
+-ENDIF()
+-
+ # ---[ Configure cpuinfo
+ IF(NOT TARGET cpuinfo)
+ IF(NOT XNNPACK_USE_SYSTEM_LIBS)
diff --git a/cmake/winml.cmake b/cmake/winml.cmake
index 4186cbefacf74..395996f0fa4b9 100644
--- a/cmake/winml.cmake
+++ b/cmake/winml.cmake
@@ -202,7 +202,7 @@ add_dependencies(winml_lib_telemetry winml_api_native)
add_dependencies(winml_lib_telemetry winml_api_native_internal)
# Link libraries
-target_link_libraries(winml_lib_telemetry PRIVATE WIL::WIL)
+target_link_libraries(winml_lib_telemetry PRIVATE ${WIL_TARGET})
###########################
# Add winml_lib_ort
@@ -282,7 +282,7 @@ add_dependencies(winml_lib_ort winml_api_native_internal)
if (onnxruntime_USE_DML)
target_add_dml(winml_lib_ort)
endif()
-target_link_libraries(winml_lib_ort PRIVATE WIL::WIL)
+target_link_libraries(winml_lib_ort PRIVATE ${WIL_TARGET})
target_link_libraries(winml_lib_ort INTERFACE winml_lib_api)
target_link_libraries(winml_lib_ort INTERFACE winml_lib_telemetry)
@@ -339,7 +339,7 @@ set_target_properties(winml_adapter
${target_folder})
# Link libraries
-target_link_libraries(winml_adapter PRIVATE WIL::WIL)
+target_link_libraries(winml_adapter PRIVATE ${WIL_TARGET})
if (onnxruntime_USE_DML)
target_add_dml(winml_adapter)
endif()
@@ -423,7 +423,7 @@ add_dependencies(winml_lib_image winml_api_native)
add_dependencies(winml_lib_image winml_api_native_internal)
# Link libraries
-target_link_libraries(winml_lib_image PRIVATE dxgi d3d11 d3d12 WIL::WIL winml_lib_common)
+target_link_libraries(winml_lib_image PRIVATE dxgi d3d11 d3d12 ${WIL_TARGET} winml_lib_common)
get_target_property(winml_lib_image_include_directories winml_lib_image INCLUDE_DIRECTORIES)
@@ -531,7 +531,7 @@ add_dependencies(winml_lib_api winml_api_native)
add_dependencies(winml_lib_api winml_api_native_internal)
# Link libraries
-target_link_libraries(winml_lib_api PRIVATE WIL::WIL winml_lib_telemetry)
+target_link_libraries(winml_lib_api PRIVATE ${WIL_TARGET} winml_lib_telemetry)
if (onnxruntime_USE_DML)
target_add_dml(winml_lib_api)
endif(onnxruntime_USE_DML)
@@ -619,7 +619,7 @@ add_dependencies(winml_lib_api_experimental winml_api_native_internal)
add_dependencies(winml_lib_api_experimental winml_api_experimental)
# Link libraries
-target_link_libraries(winml_lib_api_experimental PRIVATE WIL::WIL winml_lib_telemetry)
+target_link_libraries(winml_lib_api_experimental PRIVATE ${WIL_TARGET} winml_lib_telemetry)
if (onnxruntime_USE_DML)
target_add_dml(winml_lib_api_experimental)
endif(onnxruntime_USE_DML)
@@ -648,7 +648,7 @@ onnxruntime_add_static_library(winml_lib_common
set_target_properties(winml_lib_common PROPERTIES CXX_STANDARD 17)
set_target_properties(winml_lib_common PROPERTIES CXX_STANDARD_REQUIRED ON)
target_compile_options(winml_lib_common PRIVATE /GR- /await /bigobj /wd4238)
-target_link_libraries(winml_lib_common PRIVATE WIL::WIL)
+target_link_libraries(winml_lib_common PRIVATE ${WIL_TARGET})
target_include_directories(winml_lib_common PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/winml_api)
# Compiler flags
@@ -786,7 +786,7 @@ add_dependencies(winml_dll winml_api_native_internal)
# Link libraries
target_link_libraries(winml_dll PRIVATE re2)
-target_link_libraries(winml_dll PRIVATE WIL::WIL)
+target_link_libraries(winml_dll PRIVATE ${WIL_TARGET})
target_link_libraries(winml_dll PRIVATE winml_lib_api)
if (NOT winml_is_inbox)
target_link_libraries(winml_dll PRIVATE winml_lib_api_experimental)
diff --git a/cmake/winml_unittests.cmake b/cmake/winml_unittests.cmake
index 561bd005dc383..b655e60a8aec9 100644
--- a/cmake/winml_unittests.cmake
+++ b/cmake/winml_unittests.cmake
@@ -49,7 +49,10 @@ function(add_winml_test)
if (_UT_DEPENDS)
add_dependencies(${_UT_TARGET} ${_UT_DEPENDS})
endif()
- target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} gtest winml_google_test_lib ${onnxruntime_EXTERNAL_LIBRARIES} winml_lib_common onnxruntime windowsapp.lib)
+ target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} GTest::gtest winml_google_test_lib ${onnxruntime_EXTERNAL_LIBRARIES} winml_lib_common onnxruntime windowsapp.lib)
+ #Abseil has a lot of C4127/C4324 warnings.
+ target_compile_options(${_UT_TARGET} PRIVATE "/wd4127")
+ target_compile_options(${_UT_TARGET} PRIVATE "/wd4324")
target_compile_options(${_UT_TARGET} PRIVATE /wd5205) # workaround cppwinrt SDK bug https://github.com/microsoft/cppwinrt/issues/584
# if building inbox
@@ -174,15 +177,18 @@ target_compile_options(winml_test_common PRIVATE /wd5205) # workaround cppwinrt
if (onnxruntime_WINML_NAMESPACE_OVERRIDE STREQUAL "Windows")
target_compile_definitions(winml_test_common PRIVATE "BUILD_INBOX=1")
endif()
+#Abseil has a lot of C4127/C4324 warnings.
+target_compile_options(winml_test_common PRIVATE "/wd4127")
+target_compile_options(winml_test_common PRIVATE "/wd4324")
add_dependencies(winml_test_common
onnx
winml_api
winml_dll
)
-onnxruntime_add_include_to_target(winml_test_common onnx_proto gtest ${PROTOBUF_LIB} WIL::WIL safeint_interface ${GSL_TARGET})
+onnxruntime_add_include_to_target(winml_test_common onnx_proto GTest::gtest ${PROTOBUF_LIB} ${WIL_TARGET} safeint_interface ${GSL_TARGET})
onnxruntime_add_static_library(winml_google_test_lib ${WINML_TEST_SRC_DIR}/common/googletest/main.cpp)
-onnxruntime_add_include_to_target(winml_google_test_lib gtest)
+onnxruntime_add_include_to_target(winml_google_test_lib GTest::gtest)
set_winml_target_properties(winml_google_test_lib)
set_winml_target_properties(winml_test_common)
diff --git a/csharp/.clang-format b/csharp/.clang-format
index f2748cf3abccd..f440737ccfdd6 100644
--- a/csharp/.clang-format
+++ b/csharp/.clang-format
@@ -1,15 +1,22 @@
---
# clang-format settings for the C# code
-BasedOnStyle: Microsoft
+BasedOnStyle: Microsoft
+
+# Setting ColumnLimit to 0 so developer choices about where to break lines are maintained.
+# Developers are responsible for adhering to the 120 character maximum.
+ColumnLimit: 0
BreakBeforeBraces: Custom
BraceWrapping:
- AfterCaseLabel: true
- BeforeWhile: true
+ AfterCaseLabel: true
+ BeforeWhile: true
SplitEmptyFunction: false
SplitEmptyRecord: false
- # unfortunately there's no config option for handling the 'get' or 'set' of properties
+ # unfortunately there's no config option for handling the 'get' or 'set' of properties
IndentCaseLabels: true
KeepEmptyLinesAtTheStartOfBlocks: false
-SpacesInContainerLiterals: false
\ No newline at end of file
+NamespaceIndentation: All
+SpacesInContainerLiterals: false
+SortIncludes: CaseSensitive
+SortUsingDeclarations: LexicographicNumeric
diff --git a/csharp/ApiDocs/ApiDocs.csproj b/csharp/ApiDocs/ApiDocs.csproj
index a56e6050222d6..994e57913cf47 100644
--- a/csharp/ApiDocs/ApiDocs.csproj
+++ b/csharp/ApiDocs/ApiDocs.csproj
@@ -1,8 +1,9 @@
- net5.0
+ net6.0
enable
+ true
diff --git a/csharp/ApiDocs/docfx.json b/csharp/ApiDocs/docfx.json
index 9f458497e5933..c3ed49b563019 100644
--- a/csharp/ApiDocs/docfx.json
+++ b/csharp/ApiDocs/docfx.json
@@ -4,14 +4,22 @@
"src": [
{
"files": [
- "src/Microsoft.ML.OnnxRuntime/**.cs"
+ "src/Microsoft.ML.OnnxRuntime/Microsoft.ML.OnnxRuntime.csproj"
],
"src": "../"
}
],
"dest": "api",
"disableGitFeatures": false,
- "disableDefaultFilter": false
+ "disableDefaultFilter": false,
+ "properties": {
+ "AllowUnsafeBlocks": true,
+ "TargetFramework" : "net6.0",
+ "Nullable" : "enable",
+ "LangVersion": "8.0",
+ "EnableTrainingApis": "true"
+ },
+ "filter": "filterConfig.yml"
}
],
"build": {
diff --git a/csharp/ApiDocs/filterConfig.yml b/csharp/ApiDocs/filterConfig.yml
new file mode 100644
index 0000000000000..33a12e7262b16
--- /dev/null
+++ b/csharp/ApiDocs/filterConfig.yml
@@ -0,0 +1,5 @@
+apiRules:
+- exclude:
+ uidRegex: ^Microsoft\.ML\.OnnxRuntime\.OrtApi
+- exclude:
+ uidRegex: ^Microsoft\.ML\.OnnxRuntime\.OrtTrainingApi
diff --git a/csharp/Nuget.CSharp.config b/csharp/NuGet.CSharp.config
similarity index 100%
rename from csharp/Nuget.CSharp.config
rename to csharp/NuGet.CSharp.config
diff --git a/csharp/OnnxRuntime.CSharp.proj b/csharp/OnnxRuntime.CSharp.proj
index 5473246e8be3f..0288d752d8749 100644
--- a/csharp/OnnxRuntime.CSharp.proj
+++ b/csharp/OnnxRuntime.CSharp.proj
@@ -16,6 +16,7 @@ CMake creates a target to this project
nuget
x64
false
+
false
None
@@ -77,6 +78,7 @@ CMake creates a target to this project
$([System.DateTime]::UtcNow.ToString(yyyyMMdd))
$([System.DateTime]::UtcNow.ToString(hhmm))
@(MajorVersionNumber)
+ $(PackageVersion)$(ReleaseVersionSuffix)
$(PackageVersion)
$(PackageVersion)-dev-$(CurrentDate)-$(CurrentTime)-$(GitCommitHashShort)
diff --git a/csharp/readme.md b/csharp/readme.md
index ad71348b2993b..74572b5f17505 100644
--- a/csharp/readme.md
+++ b/csharp/readme.md
@@ -2,7 +2,7 @@
The solution files here are used to produce nuget packages for the C# bindings.
-Note that the project naming is currently confusing and needs updating.
+Note that the project naming is currently confusing and needs updating.
- The Microsoft.ML.OnnxRuntime project produces the Microsoft.ML.OnnxRuntime.**Managed** nuget package.
- The Microsoft.ML.OnnxRuntime nuget package contains the native (i.e. C++) code for various platforms.
@@ -10,17 +10,19 @@ Note that the project naming is currently confusing and needs updating.
## Solution files
The main solution file is OnnxRuntime.CSharp.sln. This includes desktop and Xamarin mobile projects.
-OnnxRuntime.DesktopOnly.CSharp.sln is a copy of that with all the mobile projects removed. This is
-due to there being no way to selectively exclude a csproj from the sln if Xamarin isn't available.
+OnnxRuntime.DesktopOnly.CSharp.sln is a copy of that with all the mobile projects removed. This is
+due to there being no way to selectively exclude a csproj from the sln if Xamarin isn't available.
If changes are required, either update the main solution first and copy the relevant changes across,
-or copy the entire file and remove the mobile projects (anything with iOS, Android or Droid in the name).
+or copy the entire file and remove the mobile projects (anything with iOS, Android or Droid in the name).
## Development setup:
### Requirements:
-NOTE: The usage of this solution is primarily for ORT developers creating the managed Microsoft.ML.OnnxRuntime.Managed
+#### Windows
+
+NOTE: The usage of this solution is primarily for ORT developers creating the managed Microsoft.ML.OnnxRuntime.Managed
nuget package. Due to that, the requirements are quite specific.
Visual Studio 2022 v17.2.4 or later, with Xamarin workloads
@@ -29,24 +31,42 @@ Visual Studio 2022 v17.2.4 or later, with Xamarin workloads
- untested
There's no good way to use Visual Studio 2022 17.3 Preview in a CI, so we currently have to build pre-.net6 targets
-using VS, and .net6 targets using dotnet. We can't build them all using dotnet as the xamarin targets require msbuild.
+using VS, and .net6 targets using dotnet. We can't build them all using dotnet as the xamarin targets require msbuild.
We can't package them using dotnet as that also requires msbuild.
-Once the official VS 2022 release supports .net6 and is available in the CI we can revert to the original simple
+Once the official VS 2022 release supports .net6 and is available in the CI we can revert to the original simple
setup of building everything using msbuild.
-To test packaging locally you will also need nuget.exe.
-Download from https://www.nuget.org/downloads.
+To test packaging locally you will also need nuget.exe.
+Download from https://www.nuget.org/downloads.
Put in a folder (e.g. C:\Program Files (x86)\NuGet).
-Add that folder to your PATH.
+Add that folder to your PATH.
+
+#### Linux
+
+1. Install [.Net SDK](https://dotnet.microsoft.com/download).
+2. Install Mono.
+ ```bash
+ wget http://download.mono-project.com/repo/xamarin.gpg && sudo apt-key add xamarin.gpg && rm xamarin.gpg
+ echo "deb https://download.mono-project.com/repo/ubuntu stable-bionic main" | sudo tee /etc/apt/sources.list.d/mono-official-stable.list
+ sudo apt update -y && apt install -y mono-devel
+ ```
+3. Install `nupkg.exe`
+ ```bash
+ wget https://dist.nuget.org/win-x86-commandline/latest/nuget.exe && sudo mv nuget.exe /usr/local/bin/nuget.exe
+ echo 'mono /usr/local/bin/nuget.exe $@' | sudo tee /usr/local/bin/nuget
+ chmod a+x /usr/local/bin/nuget
+ ```
### Magic incantations to build the nuget managed package locally:
+#### Windows
+
If we're starting with VS 2022 17.2.4 we should have dotnet sdk 6.0.301
Make sure all the required workloads are installed
`dotnet workload install android ios maccatalyst macos`
- - original example from [here](https://github.com/Sweekriti91/maui-samples/blob/swsat/devops/6.0/Apps/WeatherTwentyOne/devops/AzureDevOps/azdo_windows.yml):
+ - original example from [here](https://github.com/Sweekriti91/maui-samples/blob/swsat/devops/6.0/Apps/WeatherTwentyOne/devops/AzureDevOps/azdo_windows.yml):
- `dotnet workload install android ios maccatalyst macos maui --source https://aka.ms/dotnet6/nuget/index.json --source https://api.nuget.org/v3/index.json`
- don't need 'maui' in this list until we update the sample/test apps
- didn't seem to need --source arg/s for local build. YMMV.
@@ -55,12 +75,12 @@ Build pre-net6 targets
`msbuild -t:restore .\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj -p:SelectedTargets=PreNet6`
`msbuild -t:build .\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj -p:SelectedTargets=PreNet6`
- Need to run msbuild twice - once to restore which creates some json configs that are needed like
+ Need to run msbuild twice - once to restore which creates some json configs that are needed like
Microsoft.ML.OnnxRuntime\obj\project.assets.json, and once to build using the configs.
Build net6 targets
`dotnet build .\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj -p:SelectedTargets=Net6`
-
+
The dotnet build does the restore internally.
Create project.assets.json in obj dir with all targets so the nuget package creation includes them all
@@ -68,3 +88,32 @@ Create project.assets.json in obj dir with all targets so the nuget package crea
Create nuget package
`msbuild .\OnnxRuntime.CSharp.proj -t:CreatePackage -p:OrtPackageId=Microsoft.ML.OnnxRuntime -p:Configuration=Debug -p:Platform="Any CPU"`
+
+#### Linux
+
+For example, to build a CUDA GPU package, just run:
+```bash
+./build.sh \
+ --config="Release" \
+ --cmake_generator Ninja \
+ --use_cuda \
+ --cuda_home=/usr/local/cuda \
+ --cudnn_home=/usr \
+ --build_nuget \
+ --msbuild_extra_options \
+ /p:SelectedTargets=Net6 \
+ /p:Net6Targets=net6.0 \
+ /p:TargetFrameworks=netstandard2.0 \
+ /p:IsLinuxBuild=true
+```
+**Note**: to build a pure CPU development package, you need to add `/p:OrtPackageId="Microsoft.ML.OnnxRuntime"`
+to `--msbuild_extra_options`. Otherwise, it will try to create Xamarin mobile targets which may not be properly configured on your devbox.
+
+A `.nupkg` file will be produced at you build root, say, `build/Release`.
+
+To consume the package, in your .net project,
+```bash
+nuget add -Source ./packages/
+dotnet add package microsoft.ml.onnxruntime.managed -s ./packages --prerelease
+dotnet add package microsoft.ml.onnxruntime.gpu -s ./packages --prerelease
+```
diff --git a/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.Android/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.Android.csproj b/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.Android/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.Android.csproj
index ec9e60710fedd..5fa0349e0f9a3 100644
--- a/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.Android/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.Android.csproj
+++ b/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.Android/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.Android.csproj
@@ -100,7 +100,7 @@
-
+
libs\arm64-v8a\libonnxruntime.so
@@ -115,4 +115,4 @@
-
\ No newline at end of file
+
diff --git a/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.iOS/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.iOS.csproj b/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.iOS/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.iOS.csproj
index ce261096c1a00..1b50a2842f242 100644
--- a/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.iOS/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.iOS.csproj
+++ b/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.iOS/Microsoft.ML.OnnxRuntime.InferenceSample.Forms.iOS.csproj
@@ -147,7 +147,7 @@
-
+
Framework
True
@@ -160,4 +160,4 @@
-
\ No newline at end of file
+
diff --git a/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.NetCoreApp/Properties/launchSettings.json b/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.NetCoreApp/Properties/launchSettings.json
new file mode 100644
index 0000000000000..33504c948ad25
--- /dev/null
+++ b/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.NetCoreApp/Properties/launchSettings.json
@@ -0,0 +1,8 @@
+{
+ "profiles": {
+ "WSL": {
+ "commandName": "WSL2",
+ "distributionName": ""
+ }
+ }
+}
\ No newline at end of file
diff --git a/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample/InferenceSampleApi.cs b/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample/InferenceSampleApi.cs
index f48f9a9e3816e..05baa872a0ece 100644
--- a/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample/InferenceSampleApi.cs
+++ b/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample/InferenceSampleApi.cs
@@ -1,7 +1,8 @@
-using System;
+using Microsoft.ML.OnnxRuntime.Tensors;
+using System;
using System.Collections.Generic;
+using System.Diagnostics;
using System.IO;
-using Microsoft.ML.OnnxRuntime.Tensors;
namespace Microsoft.ML.OnnxRuntime.InferenceSample
{
@@ -9,10 +10,10 @@ public class InferenceSampleApi : IDisposable
{
public InferenceSampleApi()
{
- model = LoadModelFromEmbeddedResource("TestData.squeezenet.onnx");
+ _model = LoadModelFromEmbeddedResource("TestData.squeezenet.onnx");
// this is the data for only one input tensor for this model
- var inputTensor = LoadTensorFromEmbeddedResource("TestData.bench.in");
+ var inputData = LoadTensorFromEmbeddedResource("TestData.bench.in");
// create default session with default session options
// Creating an InferenceSession and loading the model is an expensive operation, so generally you would
@@ -20,13 +21,21 @@ public InferenceSampleApi()
CreateInferenceSession();
// setup sample input data
- inputData = new List();
- var inputMeta = inferenceSession.InputMetadata;
+ var inputMeta = _inferenceSession.InputMetadata;
+ _inputData = new List(inputMeta.Count);
+ _orderedInputNames = new List(inputMeta.Count);
+
foreach (var name in inputMeta.Keys)
{
- // note: DenseTensor takes a copy of the provided data
- var tensor = new DenseTensor(inputTensor, inputMeta[name].Dimensions);
- inputData.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
+ // We create an OrtValue in this case over the buffer of potentially different shapes.
+ // It is Okay as long as the specified shape does not exceed the actual length of the buffer
+ var shape = Array.ConvertAll(inputMeta[name].Dimensions, Convert.ToInt64);
+ Debug.Assert(ShapeUtils.GetSizeForShape(shape) <= inputData.LongLength);
+
+ var ortValue = OrtValue.CreateTensorValueFromMemory(inputData, shape);
+ _inputData.Add(ortValue);
+
+ _orderedInputNames.Add(name);
}
}
@@ -40,30 +49,47 @@ public void CreateInferenceSession(SessionOptions options = null)
options = new SessionOptions { LogId = "Sample" };
}
- inferenceSession = new InferenceSession(model, options);
+ _inferenceSession = new InferenceSession(_model, options);
}
public void Execute()
{
// Run the inference
- // 'results' is an IDisposableReadOnlyCollection container
- using (var results = inferenceSession.Run(inputData))
+ // 'results' is an IDisposableReadOnlyCollection container
+ using (var results = _inferenceSession.Run(null, _orderedInputNames, _inputData, _inferenceSession.OutputNames))
{
// dump the results
- foreach (var r in results)
+ for (int i = 0; i < results.Count; ++i)
{
- Console.WriteLine("Output for {0}", r.Name);
- Console.WriteLine(r.AsTensor().GetArrayString());
+ var name = _inferenceSession.OutputNames[i];
+ Console.WriteLine("Output for {0}", name);
+ // We can now access the native buffer directly from the OrtValue, no copy is involved.
+ // Spans are structs and are stack allocated. They do not add any GC pressure.
+ ReadOnlySpan span = results[i].GetTensorDataAsSpan();
+ Console.Write($"Input {i} results:");
+ for(int k = 0; k < span.Length; ++k)
+ {
+ Console.Write($" {span[k]}");
+ }
+ Console.WriteLine();
}
}
}
protected virtual void Dispose(bool disposing)
{
- if (disposing && inferenceSession != null)
+ if (disposing && !_disposed)
{
- inferenceSession.Dispose();
- inferenceSession = null;
+ _inferenceSession?.Dispose();
+ _inferenceSession = null;
+
+ if (_inputData != null)
+ foreach(var v in _inputData)
+ {
+ v?.Dispose();
+ }
+
+ _disposed = true;
}
}
@@ -110,8 +136,10 @@ static byte[] LoadModelFromEmbeddedResource(string path)
return model;
}
- private readonly byte[] model;
- private readonly List inputData;
- private InferenceSession inferenceSession;
+ private bool _disposed = false;
+ private readonly byte[] _model;
+ private readonly List _orderedInputNames;
+ private readonly List _inputData;
+ private InferenceSession _inferenceSession;
}
}
diff --git a/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.csproj b/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.csproj
index a59bf5bdff45c..0efb8cc7a3a23 100644
--- a/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.csproj
+++ b/csharp/sample/InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample/Microsoft.ML.OnnxRuntime.InferenceSample.csproj
@@ -2,6 +2,7 @@
netstandard2.0
+ True
diff --git a/csharp/src/Microsoft.AI.MachineLearning/Microsoft.AI.MachineLearning.targets b/csharp/src/Microsoft.AI.MachineLearning/Microsoft.AI.MachineLearning.targets
index 2f100fd75a28a..c1bddfa1adf47 100644
--- a/csharp/src/Microsoft.AI.MachineLearning/Microsoft.AI.MachineLearning.targets
+++ b/csharp/src/Microsoft.AI.MachineLearning/Microsoft.AI.MachineLearning.targets
@@ -32,7 +32,7 @@
-
+
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.shared.cs b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.shared.cs
index 34e71074d9d9d..6d69f58d20413 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.shared.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.shared.cs
@@ -4,6 +4,8 @@
using Microsoft.ML.OnnxRuntime.Tensors;
using System;
using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
namespace Microsoft.ML.OnnxRuntime
{
@@ -11,7 +13,7 @@ namespace Microsoft.ML.OnnxRuntime
/// Return immutable collection of results
///
///
- public interface IDisposableReadOnlyCollection : IReadOnlyCollection, IDisposable
+ public interface IDisposableReadOnlyCollection : IReadOnlyCollection, IReadOnlyList, IDisposable
{
}
@@ -19,13 +21,21 @@ public interface IDisposableReadOnlyCollection : IReadOnlyCollection, IDis
internal class DisposableList : List, IDisposableReadOnlyCollection
where T : IDisposable
{
+ private bool _disposed;
public DisposableList() { }
public DisposableList(int count) : base(count) { }
+ public DisposableList(IEnumerable collection) : base(collection) { }
+
#region IDisposable Support
protected virtual void Dispose(bool disposing)
{
+ if (_disposed)
+ {
+ return;
+ }
+
if (disposing)
{
// Dispose in the reverse order.
@@ -39,6 +49,7 @@ protected virtual void Dispose(bool disposing)
this[i]?.Dispose();
}
this.Clear();
+ _disposed = true;
}
}
@@ -53,6 +64,9 @@ public void Dispose()
}
///
+ /// This is a legacy class that is kept for backward compatibility.
+ /// Use OrtValue based API.
+ ///
/// This class serves as a container for model run output values including
/// tensors, sequences of tensors, sequences and maps.
/// The class must be disposed of.
@@ -63,7 +77,7 @@ public void Dispose()
public class DisposableNamedOnnxValue : NamedOnnxValue, IDisposable
{
private IOrtValueOwner _ortValueHolder;
- private bool _disposed = false;
+ private bool _disposed;
///
/// Ctor
@@ -119,179 +133,181 @@ private DisposableNamedOnnxValue(string name, Object value, MapHelper mapHelper,
public TensorElementType ElementType { get; }
///
- /// Overrides the base class method. Since the instance already owns underlying OrtValue handle,
- /// it returns an instance of OrtValue that does not own the raw handle
- /// that to the output onnxValue. With respect to pinnedMemoryHandle, it has no operation
+ /// Overrides the base class method. With respect to pinnedMemoryHandle, it has no operation
/// to do, as this class maintains a native buffer via _ortValueHolder and the memory will be
/// disposed by it. This is the case when we are dealing with an OrtValue that is backed by native memory
/// and not by pinned managed memory.
///
/// This class is generally used for outputs to be created on top of the output OrtValue,
- /// but the interface (derived from NamedOnnxValue) allows it to be passed as input and one of the test
+ /// but the interface (derived from NamedOnnxValue) allows it to be passed as output and one of the test
/// cases does it. Unless we deprecate and re-do the interface, we must support it.
///
/// always set to null
- /// An instance of OrtValue that does not own underlying memory
- internal override OrtValue InputToOrtValue(NodeMetadata metadata, out IDisposable memoryHolder)
+ /// Native OrtValue handle
+ internal override IntPtr InputToOrtValueHandle(NodeMetadata metadata, out IDisposable memoryHolder)
{
if (_ortValueHolder == null)
{
- throw new InvalidOperationException("The instance of this class does not own any OrtValues");
+ throw new InvalidOperationException("The instance of this class does not own an OrtValue");
}
// PinnedMemoryHandle holds the default value as DisposableNamedOnnxValue
// doesn't hold any managed buffer (that needs to be pinned)
memoryHolder = null;
// Return non-owning instance of OrtValue
- return _ortValueHolder.Value;
+ return _ortValueHolder.Value.Handle;
}
///
/// Generally, this class is created on top of the values that are returned by the model run.
- /// So, this method is not expected to be called. However, if it is called (an instance fed as output),
- /// it will return the OrtValue that was previously created, since the caller must understand what they are doing.
+ /// However, there is a test case that uses this value for output
+ /// It will return the OrtValue that was previously created, since the caller must understand what they are doing.
///
///
///
///
- internal override OrtValue OutputToOrtValue(NodeMetadata metadata, out IDisposable memoryOwner)
+ internal override IntPtr OutputToOrtValueHandle(NodeMetadata metadata, out IDisposable memoryOwner)
{
- return InputToOrtValue(metadata, out memoryOwner);
+ return InputToOrtValueHandle(metadata, out memoryOwner);
}
- internal static DisposableNamedOnnxValue CreateFromOrtValue(string name, OrtValue ortValue)
+ ///
+ /// This function takes ortValue and constructs an instance of DisposableNamedOnnxValue.
+ /// The new instance takes ownership of the OrtValue and will dispose of it when it is disposed of.
+ ///
+ ///
+ /// becomes null on success.
+ /// an instance of DisposableNamedOnnxValue
+ internal static DisposableNamedOnnxValue CreateFromOrtValue(string name, ref OrtValue ortValue)
{
- return CreateFromOrtValue(name, ortValue, OrtAllocator.DefaultInstance);
+ return CreateFromOrtValue(name, ref ortValue, OrtAllocator.DefaultInstance);
}
- internal static DisposableNamedOnnxValue CreateFromOrtValue(string name, OrtValue ortValue, OrtAllocator allocator)
+ ///
+ /// This function takes ortValue and constructs an instance of DisposableNamedOnnxValue.
+ /// The new instance takes ownership of the OrtValue and will dispose of it when it is disposed of.
+ ///
+ ///
+ /// becomes null on success.
+ ///
+ /// an instance of DisposableNamedOnnxValue
+ ///
+ internal static DisposableNamedOnnxValue CreateFromOrtValue(string name, ref OrtValue ortValue, OrtAllocator allocator)
{
- DisposableNamedOnnxValue result = null;
+ DisposableNamedOnnxValue result;
- IntPtr valueType;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValueType(ortValue.Handle, out valueType));
- OnnxValueType onnxValueType = (OnnxValueType)valueType;
+ var onnxValueType = ortValue.OnnxType;
switch (onnxValueType)
{
case OnnxValueType.ONNX_TYPE_TENSOR:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case OnnxValueType.ONNX_TYPE_SEQUENCE:
- result = FromNativeSequence(name, ortValue, allocator);
+ result = FromNativeSequence(name, ref ortValue, allocator);
break;
case OnnxValueType.ONNX_TYPE_MAP:
- result = FromNativeMap(name, ortValue, allocator);
+ result = FromNativeMap(name, ref ortValue, allocator);
break;
default:
- throw new NotSupportedException("OnnxValueType : " + onnxValueType + " is not supported");
+ throw new NotSupportedException($"OnnxValueType : {onnxValueType} is not supported");
}
return result;
}
///
- /// Creates an instance of DisposableNamedOnnxValue and takes ownership of ortValueElement
+ /// Creates an instance of DisposableNamedOnnxValue and takes ownership of ortValue.
/// on success.
///
/// name of the value
- /// underlying OrtValue
+ /// Underlying OrtValue. This becomes null on successful return.
///
- private static DisposableNamedOnnxValue FromNativeTensor(string name, OrtValue ortValue)
+ private static DisposableNamedOnnxValue FromNativeTensor(string name, ref OrtValue ortValue)
{
- DisposableNamedOnnxValue result = null;
-
- /* Get Tensor element type */ //TODO: Assumed value is Tensor, need to support non-tensor types in future
- IntPtr typeAndShape = IntPtr.Zero;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(ortValue.Handle, out typeAndShape));
- TensorElementType elemType = TensorElementType.DataTypeMax;
- try
- {
- IntPtr el_type;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(typeAndShape, out el_type));
- elemType = (TensorElementType)el_type;
- }
- finally
- {
- NativeMethods.OrtReleaseTensorTypeAndShapeInfo(typeAndShape);
- }
+ DisposableNamedOnnxValue result;
- switch (elemType)
+ var typeShape = ortValue.GetTensorTypeAndShape();
+ switch (typeShape.ElementDataType)
{
case TensorElementType.Float:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.Double:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.Int16:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.UInt16:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.Int32:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.UInt32:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.Int64:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.UInt64:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.UInt8:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.Int8:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.String:
- result = FromNativeTensor(name, ortValue);
+ {
+ var shape = Array.ConvertAll(typeShape.Shape, Convert.ToInt32);
+ result = FromNativeStringTensor(name, shape, ref ortValue);
+ }
break;
case TensorElementType.Bool:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.Float16:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
case TensorElementType.BFloat16:
- result = FromNativeTensor(name, ortValue);
+ result = FromNativeTensor(name, ref ortValue);
break;
default:
- throw new NotSupportedException("Tensor of element type: " + elemType + " is not supported");
+ throw new NotSupportedException($"Tensor of element type: {typeShape.ElementDataType} is not supported");
}
return result;
}
+ private static DisposableNamedOnnxValue FromNativeStringTensor(string name, int[] shape, ref OrtValue ortValue)
+ {
+ var dt = new DenseTensor(ortValue.GetStringTensorAsArray(), shape);
+ // still need to hold on to ortValue in case we need this for input handles
+ var result = new DisposableNamedOnnxValue(name, dt, TensorElementType.String, ortValue);
+ ortValue = null;
+ return result;
+ }
+
+
///
/// This method creates an instance of DisposableNamedOnnxValue that has possession of ortValueElement
- /// native memory Tensor and returns it to the caller. The original ortValueElement argument looses
- /// ownership of the native ortValueElement handle, however, the caller is still responsible for disposing them
- /// on exception. Disposing of OrtValue that has no ownership is a no-op and fine.
+ /// native memory Tensor and returns it to the caller.
///
/// data type
/// name of the output
- /// native tensor
+ /// native tensor. Becomes null on successful return.
/// DisposableNamedOnnxValue instance
- private static DisposableNamedOnnxValue FromNativeTensor(string name, OrtValue ortValue)
+ private static DisposableNamedOnnxValue FromNativeTensor(string name, ref OrtValue ortValue)
{
- var ortValueTensor = new OrtValueTensor(ortValue);
+ Debug.Assert(typeof(T) != typeof(string), "Use FromNativeStringTensor for strings");
+ var ortValueTensor = new OrtValueTensor(ref ortValue);
try
{
- if (typeof(T) == typeof(string))
- {
- var dt = new DenseTensor(ortValueTensor.GetBytesAsStringMemory(), ortValueTensor.Dimensions);
- return new DisposableNamedOnnxValue(name, dt, ortValueTensor.ElementType, ortValueTensor);
- }
- else
- {
- DenseTensor dt = new DenseTensor(ortValueTensor.Memory, ortValueTensor.Dimensions);
- return new DisposableNamedOnnxValue(name, dt, ortValueTensor.ElementType, ortValueTensor);
- }
+ var dt = new DenseTensor(ortValueTensor.Memory, ortValueTensor.Dimensions);
+ return new DisposableNamedOnnxValue(name, dt, ortValueTensor.ElementType, ortValueTensor);
}
catch (Exception)
{
@@ -308,231 +324,216 @@ private static DisposableNamedOnnxValue FromNativeTensor(string name, OrtValu
/// ortValueElement that has native sequence
/// used allocator
/// DisposableNamedOnnxValue
- private static DisposableNamedOnnxValue FromNativeSequence(string name, OrtValue ortValueSequence, OrtAllocator allocator)
+ private static DisposableNamedOnnxValue FromNativeSequence(string name, ref OrtValue ortValueSequence, OrtAllocator allocator)
{
- DisposableNamedOnnxValue result = null;
- IntPtr count;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValueCount(ortValueSequence.Handle, out count));
- var sequence = new DisposableList(count.ToInt32());
+ var valueCount = ortValueSequence.GetValueCount();
+ var sequence = new DisposableList(valueCount);
try
{
- for (int i = 0; i < count.ToInt32(); i++)
+ for (int i = 0; i < valueCount; i++)
{
- IntPtr nativeOnnxValueSeq;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(ortValueSequence.Handle, i, allocator.Pointer, out nativeOnnxValueSeq));
- using (var ortValueElement = new OrtValue(nativeOnnxValueSeq))
+ var ortValueElement = ortValueSequence.GetValue(i, allocator);
+ try
{
// Will take ownership or throw
- sequence.Add(CreateFromOrtValue(string.Empty, ortValueElement, allocator));
+ sequence.Add(CreateFromOrtValue(string.Empty, ref ortValueElement, allocator));
+ }
+ finally
+ {
+ ortValueElement?.Dispose();
}
}
// NativeOrtValueCollectionOwner will take ownership of ortValueSequence and will make sure sequence
// is also disposed.
- var nativeCollectionManager = new NativeOrtValueCollectionOwner(ortValueSequence, sequence);
- result = new DisposableNamedOnnxValue(name, sequence, OnnxValueType.ONNX_TYPE_SEQUENCE, nativeCollectionManager);
+ var nativeCollectionManager = new NativeOrtValueCollectionOwner(ref ortValueSequence, sequence);
+ return new DisposableNamedOnnxValue(name, sequence, OnnxValueType.ONNX_TYPE_SEQUENCE, nativeCollectionManager);
}
catch (Exception)
{
sequence.Dispose();
throw;
}
- return result;
}
///
/// Will extract keys and values from the map and create a DisposableNamedOnnxValue from it
///
/// name of the output
- /// ortValue that represents a map.
- /// This function does not take ownership of the map as it we copy all keys an values into a dictionary. We let the caller dispose of it
+ /// ortValue that represents a map. Becomes null on success
///
/// DisposableNamedOnnxValue
- private static DisposableNamedOnnxValue FromNativeMap(string name, OrtValue ortValueMap, OrtAllocator allocator)
+ private static DisposableNamedOnnxValue FromNativeMap(string name, ref OrtValue ortValueMap, OrtAllocator allocator)
{
DisposableNamedOnnxValue result = null;
- // Map processing is currently not recursing. It is assumed to contain
+ // Map processing is not recursive. It is assumed to contain
// only primitive types and strings tensors. No sequences or maps.
// The data is being copied to a dictionary and all ortValues are being disposed.
// not mapped for client consumption.
- using (var cleanUpList = new DisposableList())
+
+ // Keys in element 0, values in element 1
+ Span valSpan = new OrtValue[2];
+ var disposer = new DisposableArray(valSpan);
+ try
{
- IntPtr nativeOnnxValueMapKeys = IntPtr.Zero;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(ortValueMap.Handle, 0, allocator.Pointer, out nativeOnnxValueMapKeys));
- var ortValueKeys = new OrtValue(nativeOnnxValueMapKeys);
- cleanUpList.Add(ortValueKeys);
-
- var typeAndShape = IntPtr.Zero;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(nativeOnnxValueMapKeys, out typeAndShape));
- TensorElementType keyElemType;
- try
- {
- IntPtr el_type;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(typeAndShape, out el_type));
- keyElemType = (TensorElementType)el_type;
- }
- finally
- {
- NativeMethods.OrtReleaseTensorTypeAndShapeInfo(typeAndShape);
- }
+ valSpan[0] = ortValueMap.GetValue(0, allocator);
+ valSpan[1] = ortValueMap.GetValue(1, allocator);
- IntPtr nativeOnnxValueMapValues = IntPtr.Zero;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(ortValueMap.Handle, 1, allocator.Pointer, out nativeOnnxValueMapValues));
- var ortValueValues = new OrtValue(nativeOnnxValueMapValues);
- cleanUpList.Add(ortValueValues);
+ var keysTypeShape = valSpan[0].GetTensorTypeAndShape();
+ var valsTypeInfo = valSpan[1].GetTensorTypeAndShape();
- typeAndShape = IntPtr.Zero;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(nativeOnnxValueMapValues, out typeAndShape));
- TensorElementType valueElemType;
- try
- {
- IntPtr el_type;
- NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(typeAndShape, out el_type));
- valueElemType = (TensorElementType)el_type;
- }
- finally
- {
- NativeMethods.OrtReleaseTensorTypeAndShapeInfo(typeAndShape);
- }
+ int[] intKeyShape = Array.ConvertAll(keysTypeShape.Shape, Convert.ToInt32);
+ int[] intValsShape = Array.ConvertAll(valsTypeInfo.Shape, Convert.ToInt32);
// The supported combinations of key and value types are taken from the ORT C API.
- switch (keyElemType)
+ switch (keysTypeShape.ElementDataType)
{
case TensorElementType.Int64:
- switch (valueElemType)
+ switch (valsTypeInfo.ElementDataType)
{
case TensorElementType.Float:
- result = FromNativeMapElements(name, ortValueMap, ortValueKeys, ortValueValues);
+ result = FromNativeMapElements(name, ref ortValueMap,
+ intKeyShape, ref valSpan[0], intValsShape, ref valSpan[1]);
break;
case TensorElementType.Double:
- result = FromNativeMapElements(name, ortValueMap, ortValueKeys, ortValueValues);
+ result = FromNativeMapElements(name, ref ortValueMap,
+ intKeyShape, ref valSpan[0], intValsShape, ref valSpan[1]);
break;
case TensorElementType.Int64:
- result = FromNativeMapElements(name, ortValueMap, ortValueKeys, ortValueValues);
+ result = FromNativeMapElements(name, ref ortValueMap,
+ intKeyShape, ref valSpan[0], intValsShape, ref valSpan[1]);
break;
case TensorElementType.String:
- result = FromNativeMapElements(name, ortValueMap, ortValueKeys, ortValueValues);
+ result = FromNativeMapElements(name, ref ortValueMap,
+ intKeyShape, ref valSpan[0], intValsShape, ref valSpan[1]);
break;
default:
- break;
+ throw new NotSupportedException($"Map value type: {valsTypeInfo.ElementDataType} is not supported");
}
break;
case TensorElementType.String:
- switch (valueElemType)
+ switch (valsTypeInfo.ElementDataType)
{
case TensorElementType.Float:
- result = FromNativeMapElements(name, ortValueMap, ortValueKeys, ortValueValues);
+ result = FromNativeMapElements(name, ref ortValueMap,
+ intKeyShape, ref valSpan[0], intValsShape, ref valSpan[1]);
break;
case TensorElementType.Double:
- result = FromNativeMapElements(name, ortValueMap, ortValueKeys, ortValueValues);
+ result = FromNativeMapElements(name, ref ortValueMap,
+ intKeyShape, ref valSpan[0], intValsShape, ref valSpan[1]);
break;
case TensorElementType.Int64:
- result = FromNativeMapElements(name, ortValueMap, ortValueKeys, ortValueValues);
+ result = FromNativeMapElements(name, ref ortValueMap,
+ intKeyShape, ref valSpan[0], intValsShape, ref valSpan[1]);
break;
case TensorElementType.String:
- result = FromNativeMapElements(name, ortValueMap, ortValueKeys, ortValueValues);
+ result = FromNativeMapElements(name, ref ortValueMap,
+ intKeyShape, ref valSpan[0], intValsShape, ref valSpan[1]);
break;
default:
- break;
+ throw new NotSupportedException($"Map value type: {valsTypeInfo.ElementDataType} is not supported");
}
break;
default:
- throw new NotSupportedException("Map key type: " + keyElemType + " is not supported");
+ throw new NotSupportedException($"Map key type: {keysTypeShape.ElementDataType} is not supported");
}
}
+ finally
+ {
+ // Any values that are taken possession of
+ // will be null, others, like string tensors, will be disposed
+ disposer.Dispose();
+ }
+
return result;
}
///
- /// This method maps keys and values of the map and copies them into a Dictionary
- /// and returns as an instance of DisposableNamedOnnxValue that does not own or dispose
- /// any onnx/ortValueElement. The method takes possession of ortValueTensorKeys and ortValueTensorValues
- /// and disposes of them. The original ortValueElement looses ownership of the Tensor. The caller is still responsible
- /// for disposing these arguments. Disposing ortValueElement that does not have ownership is a no-op, however, either
- /// of the arguments may still need to be disposed on exception.
+ /// This method maps keys and values of the map and copies them into a managed Dictionary
+ /// and returns as an instance of DisposableNamedOnnxValue. The method takes possession of ortValueMap,
+ /// ortValueTensorKeys and ortValueTensorValues and disposes of them.
///
- /// key type
- /// value type
- /// name of the output parameter
- /// tensor with map keys.
- /// tensor with map values
- /// instance of DisposableNamedOnnxValue with Dictionary
- private static DisposableNamedOnnxValue FromNativeMapElements(string name, OrtValue ortValueMap,
- OrtValue ortValueTensorKeys, OrtValue ortValueTensorValues)
+ ///
+ ///
+ ///
+ /// becomes null on success return
+ /// keys shape in ints
+ /// becomes null on success
+ /// values shape in ints
+ /// becomes null on success
+ ///
+ private static DisposableNamedOnnxValue FromNativeMapElements(string name, ref OrtValue ortValueMap,
+ int[] keysShape, ref OrtValue ortValueTensorKeys,
+ int[] valsShape, ref OrtValue ortValueTensorValues)
{
- var listOfKeysValues = new DisposableList();
- var collOwner = new NativeOrtValueCollectionOwner(ortValueMap, listOfKeysValues);
- try
+ if (typeof(K) == typeof(string))
{
- var tensorKeys = new OrtValueTensor(ortValueTensorKeys);
- listOfKeysValues.Add(ortValueTensorKeys);
- var tensorValues = new OrtValueTensor(ortValueTensorValues);
- listOfKeysValues.Add(ortValueTensorValues);
+ var denseTensorKeys = new DenseTensor(ortValueTensorKeys.GetStringTensorAsArray(), keysShape);
- MapHelper mapHelper = null;
- if (typeof(K) == typeof(string))
+ if (typeof(V) == typeof(string))
{
- var denseTensorKeys = new DenseTensor(tensorKeys.GetBytesAsStringMemory(), tensorKeys.Dimensions);
-
- if (typeof(V) == typeof(string))
+ var denseTensorValues = new DenseTensor(ortValueTensorValues.GetStringTensorAsArray(), valsShape);
+ var map = Enumerable.Range(0, (int)denseTensorKeys.Length).ToDictionary(i => denseTensorKeys[i], i => denseTensorValues[i]);
+ var mapHelper = new MapHelper(denseTensorKeys, denseTensorValues);
+ var result = new DisposableNamedOnnxValue(name, map, mapHelper, ortValueMap);
+ ortValueMap = null;
+ return result;
+ }
+ else
+ {
+ var tensorValues = new OrtValueTensor(ref ortValueTensorValues);
+ try
{
- var map = new Dictionary();
- var denseTensorValues = new DenseTensor(tensorValues.GetBytesAsStringMemory(), tensorValues.Dimensions);
- for (var i = 0; i < denseTensorKeys.Length; i++)
- {
- map.Add(denseTensorKeys.GetValue(i), denseTensorValues.GetValue(i));
- }
- mapHelper = new MapHelper(denseTensorKeys, denseTensorValues);
- return new DisposableNamedOnnxValue(name, map, mapHelper, collOwner);
+ var denseTensorValues = new DenseTensor(tensorValues.Memory, tensorValues.Dimensions);
+ return FromMapDenseTensors(name, ref ortValueMap, denseTensorKeys, denseTensorValues, tensorValues);
}
- else
+ catch (Exception)
{
- var map = new Dictionary();
- var denseTensorValues = new DenseTensor(tensorValues.Memory, tensorValues.Dimensions);
- for (var i = 0; i < denseTensorKeys.Length; i++)
- {
- map.Add(denseTensorKeys.GetValue(i), denseTensorValues.GetValue(i));
- }
- mapHelper = new MapHelper(denseTensorKeys, denseTensorValues);
- return new DisposableNamedOnnxValue(name, map, mapHelper, collOwner);
+ tensorValues.Dispose();
+ throw;
}
}
- else
+ }
+ else
+ {
+ var disposer = new DisposableList(2);
+ try
{
+ var tensorKeys = new OrtValueTensor(ref ortValueTensorKeys);
+ disposer.Add(tensorKeys);
var denseTensorKeys = new DenseTensor(tensorKeys.Memory, tensorKeys.Dimensions);
if (typeof(V) == typeof(string))
{
- var map = new Dictionary();
- var denseTensorValues = new DenseTensor(tensorValues.GetBytesAsStringMemory(), tensorValues.Dimensions);
- for (var i = 0; i < denseTensorKeys.Length; i++)
- {
- map.Add(denseTensorKeys.GetValue(i), denseTensorValues.GetValue(i));
- }
- mapHelper = new MapHelper(denseTensorKeys, denseTensorValues);
- return new DisposableNamedOnnxValue(name, map, mapHelper, collOwner);
+ var denseTensorValues = new DenseTensor(ortValueTensorValues.GetStringTensorAsArray(), valsShape);
+ return FromMapDenseTensors(name, ref ortValueMap, denseTensorKeys, denseTensorValues, disposer);
}
else
{
+ var tensorValues = new OrtValueTensor(ref ortValueTensorValues);
+ disposer.Add(tensorValues);
var denseTensorValues = new DenseTensor