Add logit softcapping to GQA #3426
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Windows CPU arm64 Build" | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- main | |
- rel-* | |
pull_request: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
env: | |
binaryDir: 'build/cpu/win-arm64' | |
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1" | |
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime" | |
jobs: | |
windows-cpu-arm64-build: | |
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-win11-arm64-cpu" ] | |
steps: | |
- name: Checkout OnnxRuntime GenAI repo | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Setup Visual Studio 2022 | |
uses: microsoft/[email protected] | |
with: | |
vs-version: '17.4' | |
msbuild-architecture: arm64 | |
- uses: nuget/setup-nuget@v2 | |
with: | |
nuget-version: '5.x' | |
- name: Download OnnxRuntime Nightly | |
shell: powershell | |
run: | | |
$resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}" | |
$ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion | |
Write-Host "$ORT_NIGHTLY_VERSION" | |
"ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append | |
nuget install ${{ env.ORT_PACKAGE_NAME }} -version $ORT_NIGHTLY_VERSION -x -NonInteractive | |
- run: Get-ChildItem ${{ env.ORT_PACKAGE_NAME }} -Recurse | |
continue-on-error: true | |
- name: Extract OnnxRuntime library and header files | |
run: | | |
mkdir ort/lib | |
move ${{ env.ORT_PACKAGE_NAME }}/build/native/include ort/ | |
move ${{ env.ORT_PACKAGE_NAME }}/runtimes/win-arm64/native/* ort/lib/ | |
- name: Configure CMake | |
run: | | |
python -m pip install wheel | |
cmake --preset windows_arm64_cpu_release | |
- name: Build with CMake | |
run: | | |
cmake --build --preset windows_arm64_cpu_release --parallel | |
- name: Build the C# API and Run the C# Tests | |
run: | | |
cd test\csharp | |
dotnet test /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release" /p:OrtLibDir="$env:GITHUB_WORKSPACE\ort\lib" | |
- name: Install the Python Wheel and Test Dependencies | |
run: | | |
python -m pip install "numpy<2" coloredlogs flatbuffers packaging protobuf sympy pytest | |
python -m pip install -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ ort-nightly-qnn | |
python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps | |
- name: Run the Python Tests | |
run: | | |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" | |
- name: Verify Build Artifacts | |
if: always() | |
continue-on-error: true | |
run: | | |
Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir -Recurse | |
Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir\test -Recurse | |
- name: Run tests | |
run: | | |
copy $env:GITHUB_WORKSPACE\ort\lib\* .\$env:binaryDir\Release | |
& .\$env:binaryDir\Release\unit_tests.exe |