Add GroupQueryAttention on CPU in model builder (#420) #1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Windows CUDA x64 Build" | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- main | |
- rel-* | |
pull_request: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
env: | |
AZCOPY_AUTO_LOGIN_TYPE: MSI | |
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4 | |
cuda_dir: "${{ github.workspace }}\\cuda_sdk" | |
cuda_version: "11.8" | |
CUDA_PATH: ${{ github.workspace }}\\cuda_sdk\\v11.8 | |
binaryDir: 'build/cuda' | |
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime.Gpu.Windows&api-version=6.0-preview.1" | |
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime.Gpu.Windows" | |
jobs: | |
windows-cuda-x64-build: | |
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Win2022-GPU-A10" ] | |
steps: | |
- name: Checkout OnnxRuntime GenAI repo | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11.x' | |
architecture: 'x64' | |
- name: Download cuda | |
run: | | |
azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ env.cuda_version }}" ${{ env.cuda_dir}} | |
- uses: actions/setup-dotnet@v4 | |
with: | |
dotnet-version: '6.0.x' | |
- name : Install jq and curl | |
run: | | |
choco install -y jq curl | |
- name: Get the Latest OnnxRuntime Nightly Version | |
shell: pwsh | |
run: | | |
$ORT_NIGHTLY_VERSION=$(curl -s "${{ env.ORT_NIGHTLY_REST_API }}" | jq -r '.value[0].versions[0].normalizedVersion') | |
echo "$ORT_NIGHTLY_VERSION" | |
"ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Download OnnxRuntime Nightly | |
run: | | |
nuget install ${{ env.ORT_PACKAGE_NAME }} -version ${{ env.ORT_NIGHTLY_VERSION }} -ExcludeVersion -NonInteractive | |
- run: Get-ChildItem ${{ env.ORT_PACKAGE_NAME }} -Recurse | |
continue-on-error: true | |
- name: Extract OnnxRuntime library and header files | |
run: | | |
mkdir ort/lib | |
move ${{ env.ORT_PACKAGE_NAME }}/buildTransitive/native/include ort/ | |
move ${{ env.ORT_PACKAGE_NAME }}/runtimes/win-x64/native/* ort/lib/ | |
- name: Configure CMake | |
run: | | |
cmake --preset windows_x64_cuda_release -T cuda=${{ env.cuda_dir }}\\v${{ env.cuda_version }} -DTEST_PHI2=False | |
- name: Build with CMake | |
run: | | |
cmake --build --preset windows_x64_cuda_release --parallel | |
- name: Add CUDA to PATH | |
run: | | |
echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append | |
- name: Build the C# API and Run the C# Tests | |
run: | | |
cd test\csharp | |
dotnet test /p:Configuration=release /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release" | |
- name: Install the Python Wheel and Test Dependencies | |
run: | | |
python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) | |
python -m pip install -r test\python\requirements-nightly-cpu.txt | |
- name: Get HuggingFace Token | |
run: | | |
az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4 | |
$HF_TOKEN = (az keyvault secret show --vault-name anubissvcsecret --name ANUBIS-HUGGINGFACE-TOKEN --query value) | |
Write-Output "::add-mask::$HF_TOKEN" | |
Add-Content -Path $env:GITHUB_ENV -Value "HF_TOKEN=$HF_TOKEN" | |
- name: Run the Python Tests | |
run: | | |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" | |
- name: Verify Build Artifacts | |
if: always() | |
continue-on-error: true | |
run: | | |
Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir -Recurse | |
- name: Prepend CUDA to PATH and Run tests | |
run: | | |
$env:PATH = "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin;" + $env:PATH | |
echo "Current PATH variable is: $env:PATH" | |
.\build\cuda\test\Release\unit_tests.exe |