Skip to content

Add support for Write Dist #3615

Add support for Write Dist

Add support for Write Dist #3615

Workflow file for this run

# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Unit Test CI
on:
push:
branches:
# only run tests on main branch & nightly; release should be triggered manually
- nightly
- main
paths-ignore:
- "docs/*"
- "third_party/*"
- .gitignore
- "*.md"
pull_request:
paths-ignore:
- "docs/*"
- "third_party/*"
- .gitignore
- "*.md"
workflow_dispatch:
inputs:
channel:
description: "Channel to use for torch and fbgemm"
required: true
type: choice
options:
- release
- nightly
- test
jobs:
build_test:
strategy:
fail-fast: false
matrix:
cuda-tag: ["cu126", "cu128"]
os:
- linux.g5.12xlarge.nvidia.gpu
python:
- version: "3.10"
tag: "py310"
- version: "3.11"
tag: "py311"
- version: "3.12"
tag: "py312"
- version: "3.13"
tag: "py313"
is_pr:
- ${{ github.event_name == 'pull_request' }}
exclude:
- is_pr: true
cuda-tag: "cu126"
- is_pr: true
cuda-tag: "cu128"
python:
version: "3.9"
- is_pr: true
cuda-tag: "cu128"
python:
version: "3.10"
- is_pr: true
cuda-tag: "cu128"
python:
version: "3.11"
- is_pr: true
cuda-tag: "cu128"
python:
version: "3.12"
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
runner: ${{ matrix.os }}
timeout: 60
script: |
ldd --version
conda create -y --name build_binary python=${{ matrix.python.version }}
conda info
python --version
conda run -n build_binary python --version
if [[ "${{ inputs.channel }}" = "release" ]]; then
index_url=https://download.pytorch.org/whl/${{ matrix.cuda-tag }}
elif [ -z "${{ inputs.channel }}" ]; then
index_url=https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }}
else
index_url=https://download.pytorch.org/whl/${{ inputs.channel }}/${{ matrix.cuda-tag }}
fi
echo "index_url: $index_url"
conda run -n build_binary \
pip install torch --index-url $index_url
conda run -n build_binary \
python -c "import torch; print(torch.__version__)"
echo "torch succeeded"
conda run -n build_binary \
python -c "import torch.distributed"
conda run -n build_binary \
pip install fbgemm-gpu --index-url $index_url
conda run -n build_binary \
python -c "import fbgemm_gpu; print(fbgemm_gpu.__version__)"
echo "fbgemm_gpu succeeded"
conda run -n build_binary \
pip install -r requirements.txt
conda run -n build_binary \
python setup.py bdist_wheel \
--python-tag=${{ matrix.python.tag }}
conda run -n build_binary \
python -c "import torchrec"
echo "torch.distributed succeeded"
conda run -n build_binary \
python -c "import numpy"
echo "numpy succeeded"
conda install -n build_binary -y pytest
# Read the list of tests to skip from a file, ignoring empty lines and comments
skip_expression=$(awk '!/^($|#)/ {printf " and not %s", $0}' ./.github/scripts/tests_to_skip.txt)
# Check if skip_expression is effectively empty
if [ -z "$skip_expression" ]; then
skip_expression=""
else
skip_expression=${skip_expression:5} # Remove the leading " and "
fi
conda run -n build_binary \
python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors \
--ignore=torchrec/distributed/tests/test_comm.py --ignore=torchrec/distributed/tests/test_infer_shardings.py \
--ignore=torchrec/distributed/tests/test_keyed_jagged_tensor_pool.py --ignore=torchrec/distributed/tests/test_pt2_multiprocess.py \
--ignore=torchrec/distributed/tests/test_pt2.py --ignore=torchrec/distributed/tests/test_quant_model_parallel.py \
--ignore=torchrec/distributed/tests/test_quant_pruning.py --ignore=torchrec/distributed/tests/test_quant_sequence_model_parallel.py \
--ignore-glob='torchrec/metrics/*' --ignore-glob='torchrec/distributed/tests/test_model_parallel_gloo*' \
--ignore-glob='torchrec/inference/inference_legacy/tests*' --ignore-glob='*test_model_parallel_nccl*' \
--ignore=torchrec/distributed/tests/test_cache_prefetch.py --ignore=torchrec/distributed/tests/test_fp_embeddingbag_single_rank.py \
--ignore=torchrec/distributed/tests/test_infer_utils.py --ignore=torchrec/distributed/tests/test_fx_jit.py --ignore-glob=**/test_utils/ \
--ignore-glob='*test_train_pipeline*' --ignore=torchrec/distributed/tests/test_model_parallel_hierarchical.py \
-k "$skip_expression"