Skip to content

Self-hosted runner with slow tests (scheduled) #1126

Self-hosted runner with slow tests (scheduled)

Self-hosted runner with slow tests (scheduled) #1126

Workflow file for this run

name: Self-hosted runner with slow tests (scheduled)
on:
workflow_dispatch:
schedule:
- cron: "0 2 * * *"
env:
RUN_SLOW: "yes"
IS_GITHUB_CI: "1"
# To be able to run tests on CUDA 12.2
NVIDIA_DISABLE_REQUIRE: "1"
SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
permissions: {}
jobs:
run_all_tests_single_gpu:
strategy:
fail-fast: false
runs-on:
group: aws-g6-4xlarge-plus
env:
CUDA_VISIBLE_DEVICES: "0"
TEST_TYPE: "single_gpu"
container:
image: huggingface/peft-gpu:latest
options: --gpus all --shm-size "16gb" -e NVIDIA_DISABLE_REQUIRE=true
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Pip install
run: |
source activate peft
pip install -e . --no-deps
pip install pytest-reportlog
- name: Run common tests on single GPU
id: common_tests
continue-on-error: true
run: |
source activate peft
make tests_common_gpu
- name: Run examples on single GPU
id: examples
continue-on-error: true
run: |
source activate peft
make tests_examples_single_gpu
- name: Run core tests on single GPU
id: core_tests
continue-on-error: true
run: |
source activate peft
make tests_core_single_gpu
- name: Run regression tests on single GPU
id: regression
continue-on-error: true
run: |
source activate peft
make tests_regression
- name: Generate Report
if: always()
run: |
pip install slack_sdk tabulate
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
- name: Check for test failures
if: |
steps.common_tests.outcome == 'failure' ||
steps.examples.outcome == 'failure' ||
steps.core_tests.outcome == 'failure' ||
steps.regression.outcome == 'failure'
run: |
echo "One or more test suites failed. Check the logs above."
exit 1
run_all_tests_multi_gpu:
strategy:
fail-fast: false
runs-on:
group: aws-g6-12xlarge-plus
env:
CUDA_VISIBLE_DEVICES: "0,1"
TEST_TYPE: "multi_gpu"
container:
image: huggingface/peft-gpu:latest
options: --gpus all --shm-size "16gb" -e NVIDIA_DISABLE_REQUIRE=true
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Pip install
run: |
source activate peft
pip install -e . --no-deps
pip install pytest-reportlog
- name: Run common tests on multi GPU
id: common_tests
continue-on-error: true
run: |
source activate peft
make tests_common_gpu
- name: Run examples on multi GPU
id: examples
continue-on-error: true
run: |
source activate peft
make tests_examples_multi_gpu
- name: Run core tests on multi GPU
id: core_tests
continue-on-error: true
run: |
source activate peft
make tests_core_multi_gpu
- name: Run training on multi GPU
id: training
continue-on-error: true
run: |
source activate peft
make tests_training
- name: Generate Report
if: always()
run: |
pip install slack_sdk tabulate
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
- name: Check for test failures
if: |
steps.common_tests.outcome == 'failure' ||
steps.examples.outcome == 'failure' ||
steps.core_tests.outcome == 'failure' ||
steps.training.outcome == 'failure'
run: |
echo "One or more test suites failed. Check the logs above."
exit 1