Skip to content

Support constrained decoding #2107

Support constrained decoding

Support constrained decoding #2107

name: "Windows DirectML x64 Build"
on:
workflow_dispatch:
push:
branches:
- main
- rel-*
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1"
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime.DirectML"
dml_dir: "Microsoft.AI.DirectML.1.15.2"
dml_zip: "Microsoft.AI.DirectML.1.15.2.zip"
dml_url: "https://www.nuget.org/api/v2/package/Microsoft.AI.DirectML/1.15.2"
d3d12_dir: "Microsoft.Direct3D.D3D12.1.614.1"
d3d12_zip: "Microsoft.Direct3D.D3D12.1.614.1.zip"
d3d12_url: "https://www.nuget.org/api/v2/package/Microsoft.Direct3D.D3D12/1.614.1"
binaryDir: 'build/directml/win-x64'
jobs:
windows-directml-x64-build:
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Win2022-GPU-A10" ]
steps:
- name: Checkout OnnxRuntime GenAI repo
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-python@v5
with:
python-version: '3.11.x'
architecture: 'x64'
- name: Download OnnxRuntime Nightly
shell: pwsh
run: |
$resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}"
$ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion
Write-Host "$ORT_NIGHTLY_VERSION"
"ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append
nuget install ${{ env.ORT_PACKAGE_NAME }} -version $ORT_NIGHTLY_VERSION -x -NonInteractive
- name: Download DirectML
run: |
Invoke-WebRequest -Uri $env:dml_url -OutFile $env:dml_zip
- name: Download the D3D12 Agility SDK
run: |
Invoke-WebRequest -Uri $env:d3d12_url -OutFile $env:d3d12_zip
- name: Extract OnnxRuntime library and header files
run: |
mkdir ort/lib
move ${{ env.ORT_PACKAGE_NAME }}/build/native/include ort/
move ${{ env.ORT_PACKAGE_NAME }}/runtimes/win-x64/native/* ort/lib/
- name: Unzip DirectML
run: |
Expand-Archive $env:dml_zip -DestinationPath $env:dml_dir
Remove-Item -Path $env:dml_zip
- name: Unzip the D3D12 Agility SDK
run: |
Expand-Archive $env:d3d12_zip -DestinationPath $env:d3d12_dir
Remove-Item -Path $env:d3d12_zip
- name: Move the files to the ort directory
run: |
mv $env:dml_dir\bin\x64-win\DirectML.dll ort\lib
mv $env:d3d12_dir\build\native\bin\x64\D3D12Core.dll ort\lib
mv $env:dml_dir\include\DirectML.h ort\include
- name: Install Rust Toolchain
run: |
$exePath = "$env:TEMP\rustup-init.exe"
(New-Object Net.WebClient).DownloadFile('https://static.rust-lang.org/rustup/dist/x86_64-pc-windows-msvc/rustup-init.exe', $exePath)
& $exePath -y --default-toolchain=1.82.0
Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
- name: Configure CMake
run: |
cmake --preset windows_x64_directml_release -DTEST_PHI2=False
- name: Build with CMake
run: |
cmake --build --preset windows_x64_directml_release --parallel
- name: Install the Python Wheel and Test Dependencies
run: |
python -m pip install -r test\python\requirements.txt
python -m pip install -r test\python\directml\torch\requirements.txt
python -m pip install -r test\python\directml\ort\requirements.txt
python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps
- name: Verify Build Artifacts
if: always()
continue-on-error: true
run: |
Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir -Recurse
- name: Run tests
run: |
copy $env:GITHUB_WORKSPACE\ort\lib\* $env:GITHUB_WORKSPACE\$env:binaryDir\Release
& .\$env:binaryDir\Release\unit_tests.exe