Add support for Write Dist #3615
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions | |
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | |
name: Unit Test CI | |
on: | |
push: | |
branches: | |
# only run tests on main branch & nightly; release should be triggered manually | |
- nightly | |
- main | |
paths-ignore: | |
- "docs/*" | |
- "third_party/*" | |
- .gitignore | |
- "*.md" | |
pull_request: | |
paths-ignore: | |
- "docs/*" | |
- "third_party/*" | |
- .gitignore | |
- "*.md" | |
workflow_dispatch: | |
inputs: | |
channel: | |
description: "Channel to use for torch and fbgemm" | |
required: true | |
type: choice | |
options: | |
- release | |
- nightly | |
- test | |
jobs: | |
build_test: | |
strategy: | |
fail-fast: false | |
matrix: | |
cuda-tag: ["cu126", "cu128"] | |
os: | |
- linux.g5.12xlarge.nvidia.gpu | |
python: | |
- version: "3.10" | |
tag: "py310" | |
- version: "3.11" | |
tag: "py311" | |
- version: "3.12" | |
tag: "py312" | |
- version: "3.13" | |
tag: "py313" | |
is_pr: | |
- ${{ github.event_name == 'pull_request' }} | |
exclude: | |
- is_pr: true | |
cuda-tag: "cu126" | |
- is_pr: true | |
cuda-tag: "cu128" | |
python: | |
version: "3.9" | |
- is_pr: true | |
cuda-tag: "cu128" | |
python: | |
version: "3.10" | |
- is_pr: true | |
cuda-tag: "cu128" | |
python: | |
version: "3.11" | |
- is_pr: true | |
cuda-tag: "cu128" | |
python: | |
version: "3.12" | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
with: | |
runner: ${{ matrix.os }} | |
timeout: 60 | |
script: | | |
ldd --version | |
conda create -y --name build_binary python=${{ matrix.python.version }} | |
conda info | |
python --version | |
conda run -n build_binary python --version | |
if [[ "${{ inputs.channel }}" = "release" ]]; then | |
index_url=https://download.pytorch.org/whl/${{ matrix.cuda-tag }} | |
elif [ -z "${{ inputs.channel }}" ]; then | |
index_url=https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }} | |
else | |
index_url=https://download.pytorch.org/whl/${{ inputs.channel }}/${{ matrix.cuda-tag }} | |
fi | |
echo "index_url: $index_url" | |
conda run -n build_binary \ | |
pip install torch --index-url $index_url | |
conda run -n build_binary \ | |
python -c "import torch; print(torch.__version__)" | |
echo "torch succeeded" | |
conda run -n build_binary \ | |
python -c "import torch.distributed" | |
conda run -n build_binary \ | |
pip install fbgemm-gpu --index-url $index_url | |
conda run -n build_binary \ | |
python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)" | |
echo "fbgemm_gpu succeeded" | |
conda run -n build_binary \ | |
pip install -r requirements.txt | |
conda run -n build_binary \ | |
python setup.py bdist_wheel \ | |
--python-tag=${{ matrix.python.tag }} | |
conda run -n build_binary \ | |
python -c "import torchrec" | |
echo "torch.distributed succeeded" | |
conda run -n build_binary \ | |
python -c "import numpy" | |
echo "numpy succeeded" | |
conda install -n build_binary -y pytest | |
# Read the list of tests to skip from a file, ignoring empty lines and comments | |
skip_expression=$(awk '!/^($|#)/ {printf " and not %s", $0}' ./.github/scripts/tests_to_skip.txt) | |
# Check if skip_expression is effectively empty | |
if [ -z "$skip_expression" ]; then | |
skip_expression="" | |
else | |
skip_expression=${skip_expression:5} # Remove the leading " and " | |
fi | |
conda run -n build_binary \ | |
python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors \ | |
--ignore=torchrec/distributed/tests/test_comm.py --ignore=torchrec/distributed/tests/test_infer_shardings.py \ | |
--ignore=torchrec/distributed/tests/test_keyed_jagged_tensor_pool.py --ignore=torchrec/distributed/tests/test_pt2_multiprocess.py \ | |
--ignore=torchrec/distributed/tests/test_pt2.py --ignore=torchrec/distributed/tests/test_quant_model_parallel.py \ | |
--ignore=torchrec/distributed/tests/test_quant_pruning.py --ignore=torchrec/distributed/tests/test_quant_sequence_model_parallel.py \ | |
--ignore-glob='torchrec/metrics/*' --ignore-glob='torchrec/distributed/tests/test_model_parallel_gloo*' \ | |
--ignore-glob='torchrec/inference/inference_legacy/tests*' --ignore-glob='*test_model_parallel_nccl*' \ | |
--ignore=torchrec/distributed/tests/test_cache_prefetch.py --ignore=torchrec/distributed/tests/test_fp_embeddingbag_single_rank.py \ | |
--ignore=torchrec/distributed/tests/test_infer_utils.py --ignore=torchrec/distributed/tests/test_fx_jit.py --ignore-glob=**/test_utils/ \ | |
--ignore-glob='*test_train_pipeline*' --ignore=torchrec/distributed/tests/test_model_parallel_hierarchical.py \ | |
-k "$skip_expression" |