Skip to content

Linux CPU x64 Nightly Benchmark #14

Linux CPU x64 Nightly Benchmark

Linux CPU x64 Nightly Benchmark #14

name: "Linux CPU x64 Nightly Benchmark"
# Run at 5:00 AM UTC every day
# 9:00 PM PST
on:
workflow_dispatch:
schedule:
- cron: "0 5 * * *"
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
ort_dir: "onnxruntime-linux-x64-1.18.0"
ort_zip: "onnxruntime-linux-x64-1.18.0.tgz"
ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.18.0/onnxruntime-linux-x64-gpu-1.18.0.tgz"
jobs:
job:
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Ubuntu2004-T4" ]
steps:
- name: Checkout OnnxRuntime GenAI repo
uses: actions/checkout@v2
- name: Download OnnxRuntime
run: |
curl -L -o ${{ env.ort_zip }} ${{ env.ort_url }}
- name: Unzip OnnxRuntime
run: |
tar -xzf ${{ env.ort_zip }}
rm ${{ env.ort_zip }}
- name: Rename OnnxRuntime to ort
run: |
mv ${{ env.ort_dir }} ort
- name: Git Submodule Update
run: |
git submodule update --init --recursive
- name: Build with CMake and clang
run: |
set -e -x
rm -rf build
cmake --preset linux_gcc_cuda_release
cmake --build --preset linux_gcc_cuda_release
- name: Install the python wheel and test dependencies
run: |
python3 -m pip install build/cuda/wheel/onnxruntime_genai*.whl
python3 -m pip install -r test/python/requirements-cuda.txt --user
- name: Get HuggingFace Token
run: |
az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4
HF_TOKEN=$(az keyvault secret show --vault-name anubissvcsecret --name ANUBIS-HUGGINGFACE-TOKEN --query value)
echo "::add-mask::$HF_TOKEN"
echo "HF_TOKEN=$HF_TOKEN" >> $GITHUB_ENV
- name: Build phi-2 int-4 model for cpu and gpu
run: |
python3 -m onnxruntime_genai.models.builder -m microsoft/phi-2 -o genai_models/phi2-int4-cpu -p int4 -e cpu -c hf_cache
python3 -m onnxruntime_genai.models.builder -m microsoft/phi-2 -o genai_models/phi2-int4-cuda -p int4 -e cuda -c hf_cache
- name: Run the python benchmark for cpu and gpu
run: |
python3 benchmark/python/benchmark_e2e.py --input_folder genai_models/phi2-int4-cpu --output benchmark/python/phi-2-cpu.csv -b 1,2 -l 16,64,256 -g 128,256 -r 100 -w 5 -k 1
python3 benchmark/python/benchmark_e2e.py --input_folder genai_models/phi2-int4-cuda --output benchmark/python/phi-2-cuda.csv -b 1,16 -l 64,256,1024 -g 128,512 -r 100 -w 5 -k 1