Fix MPMD detected error during training with TP #585
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Optimum neuron / Test INF1 partial export | |
on: | |
push: | |
branches: [ main ] | |
paths: | |
- "setup.py" | |
- "optimum/**.py" | |
pull_request: | |
branches: [ main ] | |
paths: | |
- "setup.py" | |
- "optimum/**.py" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
do-the-job: | |
name: Run INF1 export tests | |
runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci] | |
env: | |
AWS_REGION: us-east-1 | |
steps: | |
- name: Install Neuron runtime | |
run: | | |
. /etc/os-release | |
sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null <<EOF | |
deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main | |
EOF | |
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add - | |
sudo apt-get update -y | |
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y | |
export PATH=/opt/aws/neuron/bin:$PATH | |
- name: Checkout | |
uses: actions/checkout@v2 | |
- name: Install system packages | |
run: | | |
sudo apt install python3.8-venv python3-dev -y | |
- name: Install python packages | |
run: | | |
python3 -m venv aws_neuron_venv_pytorch | |
source aws_neuron_venv_pytorch/bin/activate | |
python -m pip install -U pip | |
python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com | |
python -m pip install .[neuron,tests] | |
python -m pip uninstall optimum -y | |
python -m pip install optimum | |
- name: Run CLI tests | |
run: | | |
source aws_neuron_venv_pytorch/bin/activate | |
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/cli | |
- name: Run export tests | |
run: | | |
source aws_neuron_venv_pytorch/bin/activate | |
export MAX_EXPORT_TEST_COMBINATIONS=1 | |
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters |