Linux CPU x64 Nightly Benchmark #18
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Linux CPU x64 Nightly Benchmark" | |
# Run at 5:00 AM UTC every day | |
# 9:00 PM PST | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: "0 5 * * *" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
env: | |
ort_dir: "onnxruntime-linux-x64-1.18.0" | |
ort_zip: "onnxruntime-linux-x64-1.18.0.tgz" | |
ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.18.0/onnxruntime-linux-x64-gpu-1.18.0.tgz" | |
jobs: | |
job: | |
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2004-T4" ] | |
steps: | |
- name: Checkout OnnxRuntime GenAI repo | |
uses: actions/checkout@v2 | |
- name: Download OnnxRuntime | |
run: | | |
curl -L -o ${{ env.ort_zip }} ${{ env.ort_url }} | |
- name: Unzip OnnxRuntime | |
run: | | |
tar -xzf ${{ env.ort_zip }} | |
rm ${{ env.ort_zip }} | |
- name: Rename OnnxRuntime to ort | |
run: | | |
mv ${{ env.ort_dir }} ort | |
- name: Git Submodule Update | |
run: | | |
git submodule update --init --recursive | |
- name: Build with CMake and clang | |
run: | | |
set -e -x | |
rm -rf build | |
cmake --preset linux_gcc_cuda_release | |
cmake --build --preset linux_gcc_cuda_release | |
- name: Install the python wheel and test dependencies | |
run: | | |
python3 -m pip install build/cuda/wheel/onnxruntime_genai*.whl | |
python3 -m pip install -r test/python/requirements-cuda.txt --user | |
- name: Get HuggingFace Token | |
run: | | |
az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4 | |
HF_TOKEN=$(az keyvault secret show --vault-name anubissvcsecret --name ANUBIS-HUGGINGFACE-TOKEN --query value) | |
echo "::add-mask::$HF_TOKEN" | |
echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV | |
- name: Build phi-2 int-4 model for cpu and gpu | |
run: | | |
python3 -m onnxruntime_genai.models.builder -m microsoft/phi-2 -o genai_models/phi2-int4-cpu -p int4 -e cpu -c hf_cache | |
python3 -m onnxruntime_genai.models.builder -m microsoft/phi-2 -o genai_models/phi2-int4-cuda -p int4 -e cuda -c hf_cache | |
- name: Run the python benchmark for cpu and gpu | |
run: | | |
python3 benchmark/python/benchmark_e2e.py --input_folder genai_models/phi2-int4-cpu --output benchmark/python/phi-2-cpu.csv -b 1,2 -l 16,64,256 -g 128,256 -r 100 -w 5 -k 1 | |
python3 benchmark/python/benchmark_e2e.py --input_folder genai_models/phi2-int4-cuda --output benchmark/python/phi-2-cuda.csv -b 1,16 -l 64,256,1024 -g 128,512 -r 100 -w 5 -k 1 |