From bce2abba219db6bb0ebca5cd94d7af190758c8ed Mon Sep 17 00:00:00 2001 From: HDCharles <39544797+HDCharles@users.noreply.github.com> Date: Mon, 18 Nov 2024 16:48:11 -0500 Subject: [PATCH] migrate to linux_job_v2 and manylinux 2_28 (#1302) * Update float8_test.yml to use linux_job_v2 * Update nightly_smoke_test.yml * Update float8_test.yml no binutils * Update post_build_script.sh * Update post_build_script.sh * Update regression_test.yml * Update regression_test.yml --- .github/workflows/float8_test.yml | 4 +-- .github/workflows/nightly_smoke_test.yml | 2 +- .github/workflows/regression_test.yml | 43 ++++++++++++++++++------ packaging/post_build_script.sh | 2 +- 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/.github/workflows/float8_test.yml b/.github/workflows/float8_test.yml index f90282011e..760beb6319 100644 --- a/.github/workflows/float8_test.yml +++ b/.github/workflows/float8_test.yml @@ -29,7 +29,7 @@ jobs: gpu-arch-type: "cuda" gpu-arch-version: "12.1" - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: timeout: 60 runner: ${{ matrix.runs-on }} @@ -38,8 +38,6 @@ jobs: script: | conda create -n venv python=3.9 -y conda activate venv - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH python -m pip install --upgrade pip pip install ${{ matrix.torch-spec }} diff --git a/.github/workflows/nightly_smoke_test.yml b/.github/workflows/nightly_smoke_test.yml index 9e2d4fee82..9f3dc3c0fb 100644 --- a/.github/workflows/nightly_smoke_test.yml +++ b/.github/workflows/nightly_smoke_test.yml @@ -26,7 +26,7 @@ jobs: gpu-arch-version: "12.1" - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: ${{ matrix.runs-on }} gpu-arch-type: ${{ matrix.gpu-arch-type }} diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml index 975e0470f5..cf58e476c2 100644 --- a/.github/workflows/regression_test.yml +++ b/.github/workflows/regression_test.yml @@ -18,6 +18,38 @@ env: HF_TOKEN: ${{ secrets.HF_TOKEN }} jobs: + test-nightly: + strategy: + fail-fast: false + matrix: + include: + - name: CUDA Nightly + runs-on: linux.g5.12xlarge.nvidia.gpu + torch-spec: '--pre torch==2.6.0.dev20241101 --index-url https://download.pytorch.org/whl/nightly/cu121' + gpu-arch-type: "cuda" + gpu-arch-version: "12.1" + - name: CPU Nightly + runs-on: linux.4xlarge + torch-spec: '--pre torch==2.6.0.dev20241101 --index-url https://download.pytorch.org/whl/nightly/cpu' + gpu-arch-type: "cpu" + gpu-arch-version: "" + + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + timeout: 120 + runner: ${{ matrix.runs-on }} + gpu-arch-type: ${{ matrix.gpu-arch-type }} + gpu-arch-version: ${{ matrix.gpu-arch-version }} + script: | + conda create -n venv python=3.9 -y + conda activate venv + python -m pip install --upgrade pip + pip install ${{ matrix.torch-spec }} + pip install -r dev-requirements.txt + pip install . + export CONDA=$(dirname $(dirname $(which conda))) + export LD_LIBRARY_PATH=$CONDA/lib/:$LD_LIBRARY_PATH + pytest test --verbose -s test: strategy: fail-fast: false @@ -38,12 +70,6 @@ jobs: torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121' gpu-arch-type: "cuda" gpu-arch-version: "12.1" - - name: CUDA Nightly - runs-on: linux.g5.12xlarge.nvidia.gpu - torch-spec: '--pre torch==2.6.0.dev20241101 --index-url https://download.pytorch.org/whl/nightly/cu121' - gpu-arch-type: "cuda" - gpu-arch-version: "12.1" - - name: CPU 2.3 runs-on: linux.4xlarge torch-spec: 'torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu' @@ -59,11 +85,6 @@ jobs: torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu' gpu-arch-type: "cpu" gpu-arch-version: "" - - name: CPU Nightly - runs-on: linux.4xlarge - torch-spec: '--pre torch==2.6.0.dev20241101 --index-url https://download.pytorch.org/whl/nightly/cpu' - gpu-arch-type: "cpu" - gpu-arch-version: "" uses: pytorch/test-infra/.github/workflows/linux_job.yml@main with: diff --git a/packaging/post_build_script.sh b/packaging/post_build_script.sh index 70e8d83392..e6cfc8adfe 100644 --- a/packaging/post_build_script.sh +++ b/packaging/post_build_script.sh @@ -13,7 +13,7 @@ if [[ "$CU_VERSION" == cu* ]]; then WHEEL_NAME=$(ls dist/) pushd dist - manylinux_plat=manylinux2014_x86_64 + manylinux_plat=manylinux_2_28_x86_64 auditwheel repair --plat "$manylinux_plat" -w . \ --exclude libtorch.so \ --exclude libtorch_python.so \