.github/workflows/test_fx_automatic_parallel.yml

name: Automatic Model Parallelism Test on GPUs

on:
  pull_request:
    branches:
      - main
    paths:
      - 'optimum/fx/parallelization/**.py'
  push:
    branches:
      - main
    paths:
      - 'optimum/fx/parallelization/**.py'

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

jobs:
  run_gpu_tests:
    strategy:
      fail-fast: false
      matrix:
        config:
          - name: GPU-enabled Optimum Test Suite
            image: nvidia/cuda:12.4.1-devel-ubuntu22.04
        gpu_target: ["nvidia-multi-gpu-a10-runners"]

    name: ${{ matrix.config.name }}
    runs-on:
      group: "${{matrix.gpu_target}}"

    container:
      image: ${{ matrix.config.image }}
      options: --mount type=tmpfs,destination=/tmp --shm-size 64gb --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/
      env:
        NCCL_DEBUG: INFO
        HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
    defaults:
      run:
        shell: bash

    steps:
      - uses: actions/setup-python@v5
        with:
          python-version: '3.10'

      - name: Checkout optimum
        uses: actions/checkout@v4
        with:
          fetch-depth: 1

      - name: Run nvidia-smi
        run: |
          nvidia-smi

      - name: Install dependencies
        run: |
          python3 -m pip install -U pip
          python3 -m pip install torch transformers
          python3 -m pip install .[tests]

      - name: Run automatic model parallelism tests
        run: |
          pytest -s -v -o log_cli=true tests/fx/parallelization