Add GroupQueryAttention on CPU in model builder (#420) #1

Workflow file for this run

.github/workflows/win-cuda-x64-build.yml at e2aa89e

	name: "Windows CUDA x64 Build"
	on:
	workflow_dispatch:
	push:
	branches:
	- main
	- rel-*
	pull_request:

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	env:
	AZCOPY_AUTO_LOGIN_TYPE: MSI
	AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
	cuda_dir: "${{ github.workspace }}\\cuda_sdk"
	cuda_version: "11.8"
	CUDA_PATH: ${{ github.workspace }}\\cuda_sdk\\v11.8
	binaryDir: 'build/cuda'
	ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime.Gpu.Windows&api-version=6.0-preview.1"
	ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime.Gpu.Windows"

	jobs:
	windows-cuda-x64-build:
	runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Win2022-GPU-A10" ]
	steps:
	- name: Checkout OnnxRuntime GenAI repo
	uses: actions/checkout@v4
	with:
	submodules: true

	- uses: actions/setup-python@v5
	with:
	python-version: '3.11.x'
	architecture: 'x64'

	- name: Download cuda
	run: \|
	azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ env.cuda_version }}" ${{ env.cuda_dir}}

	- uses: actions/setup-dotnet@v4
	with:
	dotnet-version: '6.0.x'

	- name : Install jq and curl
	run: \|
	choco install -y jq curl

	- name: Get the Latest OnnxRuntime Nightly Version
	shell: pwsh
	run: \|
	$ORT_NIGHTLY_VERSION=$(curl -s "${{ env.ORT_NIGHTLY_REST_API }}" \| jq -r '.value[0].versions[0].normalizedVersion')
	echo "$ORT_NIGHTLY_VERSION"
	"ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" \| Out-File -FilePath $env:GITHUB_ENV -Append
	- name: Download OnnxRuntime Nightly
	run: \|
	nuget install ${{ env.ORT_PACKAGE_NAME }} -version ${{ env.ORT_NIGHTLY_VERSION }} -ExcludeVersion -NonInteractive

	- run: Get-ChildItem ${{ env.ORT_PACKAGE_NAME }} -Recurse
	continue-on-error: true

	- name: Extract OnnxRuntime library and header files
	run: \|
	mkdir ort/lib
	move ${{ env.ORT_PACKAGE_NAME }}/buildTransitive/native/include ort/
	move ${{ env.ORT_PACKAGE_NAME }}/runtimes/win-x64/native/* ort/lib/

	- name: Configure CMake
	run: \|
	cmake --preset windows_x64_cuda_release -T cuda=${{ env.cuda_dir }}\\v${{ env.cuda_version }} -DTEST_PHI2=False

	- name: Build with CMake
	run: \|
	cmake --build --preset windows_x64_cuda_release --parallel

	- name: Add CUDA to PATH
	run: \|
	echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" \| Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append

	- name: Build the C# API and Run the C# Tests
	run: \|
	cd test\csharp
	dotnet test /p:Configuration=release /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release"

	- name: Install the Python Wheel and Test Dependencies
	run: \|
	python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl"))
	python -m pip install -r test\python\requirements-nightly-cpu.txt

	- name: Get HuggingFace Token
	run: \|
	az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4
	$HF_TOKEN = (az keyvault secret show --vault-name anubissvcsecret --name ANUBIS-HUGGINGFACE-TOKEN --query value)
	Write-Output "::add-mask::$HF_TOKEN"
	Add-Content -Path $env:GITHUB_ENV -Value "HF_TOKEN=$HF_TOKEN"

	- name: Run the Python Tests
	run: \|
	python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"


	- name: Verify Build Artifacts
	if: always()
	continue-on-error: true
	run: \|

	Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir -Recurse

	- name: Prepend CUDA to PATH and Run tests
	run: \|
	$env:PATH = "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin;" + $env:PATH
	echo "Current PATH variable is: $env:PATH"
	.\build\cuda\test\Release\unit_tests.exe

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add GroupQueryAttention on CPU in model builder (#420) #1

Workflow file

Add GroupQueryAttention on CPU in model builder (#420) #1

Jobs

Run details

Workflow file for this run