Skip to content

Commit 3d94709

Browse files
Added gating changes to windows pytorch wheels
1 parent 666a4f2 commit 3d94709

File tree

4 files changed

+153
-9
lines changed

4 files changed

+153
-9
lines changed

.github/workflows/build_windows_pytorch_wheels.yml

Lines changed: 93 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,18 @@ on:
1717
description: S3 subdirectory, not including the GPU-family
1818
required: true
1919
type: string
20+
s3_staging_subdir:
21+
description: S3 staging subdirectory, not including the GPU-family
22+
required: true
23+
type: string
2024
cloudfront_url:
2125
description: CloudFront URL pointing to Python index
2226
required: true
2327
type: string
28+
cloudfront_staging_url:
29+
description: CloudFront base URL pointing to staging Python index
30+
required: true
31+
type: string
2432
rocm_version:
2533
description: ROCm version to pip install
2634
type: string
@@ -47,10 +55,18 @@ on:
4755
description: S3 subdirectory, not including the GPU-family
4856
type: string
4957
default: "v2"
58+
s3_staging_subdir:
59+
description: S3 staging subdirectory, not including the GPU-family
60+
type: string
61+
default: "v2-staging"
5062
cloudfront_url:
5163
description: CloudFront base URL pointing to Python index
5264
type: string
5365
default: "https://d25kgig7rdsyks.cloudfront.net/v2"
66+
cloudfront_staging_url:
67+
description: CloudFront base URL pointing to staging Python index
68+
type: string
69+
default: "https://d25kgig7rdsyks.cloudfront.net/v2-staging"
5470
rocm_version:
5571
description: ROCm version to pip install
5672
type: string
@@ -154,26 +170,24 @@ jobs:
154170
# run: |
155171
# python external-builds/pytorch/sanity_check_wheel.py ${{ env.PACKAGE_DIST_DIR }}
156172

157-
- name: Upload wheels to S3
173+
- name: Upload wheels to S3 staging
158174
if: ${{ github.repository_owner == 'ROCm' }}
159-
# Using 'cmd' here since PACKAGE_DIST_DIR uses \ in paths instead of /
160-
shell: cmd
161175
run: |
162-
aws s3 cp ${{ env.PACKAGE_DIST_DIR }}/ ^
163-
s3://${{ env.S3_BUCKET_PY }}/${{ inputs.s3_subdir }}/${{ inputs.amdgpu_family }}/ ^
176+
aws s3 cp ${{ env.PACKAGE_DIST_DIR }}/ s3://${{ env.S3_BUCKET_PY }}/${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}/ \
164177
--recursive --exclude "*" --include "*.whl"
165178
166-
- name: (Re-)Generate Python package release index
179+
- name: (Re-)Generate Python package release index for staging
167180
if: ${{ github.repository_owner == 'ROCm' }}
168181
run: |
169182
pip install boto3 packaging
170-
python ./build_tools/third_party/s3_management/manage.py ${{ inputs.s3_subdir }}/${{ inputs.amdgpu_family }}
183+
python ./build_tools/third_party/s3_management/manage.py ${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}
171184
172185
generate_target_to_run:
173186
name: Generate target_to_run
174187
runs-on: ubuntu-24.04
175188
outputs:
176189
test_runs_on: ${{ steps.configure.outputs.test-runs-on }}
190+
bypass_tests_for_releases: ${{ steps.configure.outputs.bypass_tests_for_releases }}
177191
steps:
178192
- name: Checking out repository
179193
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
@@ -196,3 +210,75 @@ jobs:
196210
cloudfront_url: ${{ inputs.cloudfront_url }}
197211
python_version: ${{ inputs.python_version }}
198212
torch_version: ${{ needs.build_pytorch_wheels.outputs.torch_version }}
213+
214+
upload_pytorch_wheels:
215+
name: Release PyTorch Wheels to S3
216+
needs: [build_pytorch_wheels, generate_target_to_run, test_pytorch_wheels]
217+
if: always()
218+
runs-on: ubuntu-24.04
219+
env:
220+
S3_BUCKET_PY: "therock-${{ inputs.release_type }}-python"
221+
CP_VERSION: "${{ needs.build_pytorch_wheels.outputs.cp_version }}"
222+
TORCH_VERSION: "${{ needs.build_pytorch_wheels.outputs.torch_version }}"
223+
TORCHAUDIO_VERSION: "${{ needs.build_pytorch_wheels.outputs.torchaudio_version }}"
224+
TORCHVISION_VERSION: "${{ needs.build_pytorch_wheels.outputs.torchvision_version }}"
225+
TRITON_VERSION: "${{ needs.build_pytorch_wheels.outputs.triton_version }}"
226+
227+
steps:
228+
- name: Checkout
229+
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
230+
231+
- name: Configure AWS Credentials
232+
if: always()
233+
uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a # v4.3.1
234+
with:
235+
aws-region: us-east-2
236+
role-to-assume: arn:aws:iam::692859939525:role/therock-${{ inputs.release_type }}-releases
237+
238+
239+
- name: Determine upload flag
240+
env:
241+
BUILD_RESULT: ${{ needs.build_pytorch_wheels.result }}
242+
TEST_RESULT: ${{ needs.test_pytorch_wheels.result }}
243+
TEST_RUNS_ON: ${{ needs.generate_target_to_run.outputs.test_runs_on }}
244+
BYPASS_TESTS_FOR_RELEASES: ${{ needs.generate_target_to_run.outputs.bypass_tests_for_releases }}
245+
run: |
246+
# 1) If the build failed → upload=false
247+
if [[ "$BUILD_RESULT" != "success" ]]; then
248+
echo "::warning::Build failed. Skipping upload."
249+
echo "upload=false" >> "$GITHUB_ENV"
250+
251+
# 2) Else if there was a test runner AND tests failed or were skipped → upload=false
252+
elif [[ -n "$TEST_RUNS_ON" && ( "$TEST_RESULT" == "failure" || "$TEST_RESULT" == "skipped" ) ]]; then
253+
echo "::warning::Tests failed or were skipped (runner present). Skipping upload."
254+
echo "upload=false" >> "$GITHUB_ENV"
255+
256+
# 3) Else if BYPASS_TESTS_FOR_RELEASES is not set and there was no test runner → upload=false
257+
elif [[ -z "$BYPASS_TESTS_FOR_RELEASES" && -z "$TEST_RUNS_ON" ]]; then
258+
echo "::warning::No test runner and BYPASS_TESTS_FOR_RELEASES not set. Skipping upload."
259+
echo "upload=false" >> "$GITHUB_ENV"
260+
261+
# 4) Otherwise → upload=true
262+
else
263+
echo "upload=true" >> "$GITHUB_ENV"
264+
fi
265+
266+
- name: Copy PyTorch wheels from staging to release S3
267+
if: ${{ env.upload == 'true' }}
268+
run: |
269+
echo "Copying exact tested wheels to release S3 bucket..."
270+
aws s3 cp \
271+
s3://${S3_BUCKET_PY}/${{ inputs.s3_staging_subdir }}/${{ inputs.amdgpu_family }}/ \
272+
s3://${S3_BUCKET_PY}/${{ inputs.s3_subdir }}/${{ inputs.amdgpu_family }}/ \
273+
--recursive \
274+
--exclude "*" \
275+
--include "torch-${TORCH_VERSION}-${CP_VERSION}-linux_x86_64.whl" \
276+
--include "torchaudio-${TORCHAUDIO_VERSION}-${CP_VERSION}-linux_x86_64.whl" \
277+
--include "torchvision-${TORCHVISION_VERSION}-${CP_VERSION}-linux_x86_64.whl" \
278+
--include "pytorch_triton_rocm-${TRITON_VERSION}-${CP_VERSION}-linux_x86_64.whl"
279+
280+
- name: (Re-)Generate Python package release index
281+
if: ${{ env.upload == 'true' }}
282+
run: |
283+
pip install boto3 packaging
284+
python ./build_tools/third_party/s3_management/manage.py ${{ inputs.s3_subdir }}/${{ inputs.amdgpu_family }}

.github/workflows/release_windows_packages.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ on:
1414
description: "Subdirectory to push the Python packages"
1515
type: string
1616
default: "v2"
17+
s3_staging_subdir:
18+
description: "Staging subdirectory to push the packages"
19+
type: string
20+
default: "v2-staging"
1721
# Trigger manually (typically to test the workflow or manually build a release [candidate])
1822
workflow_dispatch:
1923
inputs:
@@ -27,6 +31,10 @@ on:
2731
description: "Subdirectory to push the Python packages"
2832
type: string
2933
default: "v2"
34+
s3_staging_subdir:
35+
description: "Staging subdirectory to push the packages"
36+
type: string
37+
default: "v2-staging"
3038
families:
3139
description: "A comma separated list of AMD GPU families, e.g. `gfx94X,gfx103x`, or empty for the default list"
3240
type: string
@@ -54,6 +62,7 @@ jobs:
5462
release_type: ${{ env.release_type }}
5563
package_targets: ${{ steps.configure.outputs.package_targets }}
5664
cloudfront_url: ${{ steps.release_information.outputs.cloudfront_url }}
65+
cloudfront_staging_url: ${{ steps.release_information.outputs.cloudfront_staging_url }}
5766
steps:
5867
- name: Checkout repository
5968
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
@@ -83,6 +92,7 @@ jobs:
8392
base_version=$(jq -r '.["rocm-version"]' version.json)
8493
echo "version=${base_version}${version_suffix}" >> $GITHUB_OUTPUT
8594
echo "cloudfront_url=${cloudfront_base_url}/${{ env.S3_SUBDIR }}" >> $GITHUB_OUTPUT
95+
echo "cloudfront_staging_url=${cloudfront_base_url}/${{ env.S3_STAGING_SUBDIR }}" >> $GITHUB_OUTPUT
8696
8797
- name: Generating package target matrix
8898
id: configure
@@ -117,6 +127,7 @@ jobs:
117127
S3_BUCKET_TAR: "therock-${{ needs.setup_metadata.outputs.release_type }}-tarball"
118128
S3_BUCKET_PY: "therock-${{ needs.setup_metadata.outputs.release_type }}-python"
119129
S3_SUBDIR: ${{ inputs.s3_subdir || 'v2' }}
130+
S3_STAGING_SUBDIR: ${{ inputs.s3_staging_subdir || 'v2-staging' }}
120131

121132
steps:
122133
- name: "Checking out repository"
@@ -242,6 +253,22 @@ jobs:
242253
aws-region: us-east-2
243254
role-to-assume: arn:aws:iam::692859939525:role/therock-${{ env.RELEASE_TYPE }}-releases
244255

256+
- name: Upload Releases to staging S3
257+
if: ${{ github.repository_owner == 'ROCm' }}
258+
run: |
259+
aws s3 cp ${{ env.OUTPUT_DIR }}/packages/dist/ s3://${{ env.S3_BUCKET_PY }}/${{ env.S3_STAGING_SUBDIR }}/${{ matrix.target_bundle.amdgpu_family }}/ \
260+
--recursive --no-follow-symlinks \
261+
--exclude "*" \
262+
--include "*.whl" \
263+
--include "*.tar.gz"
264+
265+
- name: (Re-)Generate Python package release index for staging
266+
if: ${{ github.repository_owner == 'ROCm' }}
267+
run: |
268+
pip install boto3 packaging
269+
python ./build_tools/third_party/s3_management/manage.py ${{ env.S3_STAGING_SUBDIR }}/${{ matrix.target_bundle.amdgpu_family }}
270+
271+
## TODO: Restrict uploading to the non-staging S3 directory until sanity checks and all validation tests have successfully passed.
245272
- name: Upload Releases to S3
246273
if: ${{ github.repository_owner == 'ROCm' }}
247274
run: |
@@ -271,7 +298,9 @@ jobs:
271298
{ "amdgpu_family": "${{ matrix.target_bundle.amdgpu_family }}",
272299
"release_type": "${{ env.RELEASE_TYPE }}",
273300
"s3_subdir": "${{ env.S3_SUBDIR }}",
301+
"s3_staging_subdir": "${{ env.S3_STAGING_SUBDIR }}",
274302
"cloudfront_url": "${{ needs.setup_metadata.outputs.cloudfront_url }}",
303+
"cloudfront_staging_url": "${{ needs.setup_metadata.outputs.cloudfront_staging_url }}",
275304
"rocm_version": "${{ needs.setup_metadata.outputs.version }}"
276305
}
277306

.github/workflows/release_windows_pytorch_wheels.yml

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,18 @@ on:
1414
description: S3 subdirectory, not including the GPU-family
1515
type: string
1616
default: "v2"
17+
s3_staging_subdir:
18+
description: Staging subdirectory to push the wheels for test
19+
type: string
20+
default: "v2-staging"
1721
cloudfront_url:
1822
description: CloudFront URL pointing to Python index
1923
type: string
20-
default: "https://d25kgig7rdsyks.cloudfront.net/v2"
24+
default: "https://rocm.nightlies.amd.com/v2"
25+
cloudfront_staging_url:
26+
description: CloudFront base URL pointing to staging Python index
27+
required: true
28+
type: string
2129
rocm_version:
2230
description: ROCm version to pip install
2331
type: string
@@ -40,10 +48,18 @@ on:
4048
description: S3 subdirectory, not including the GPU-family
4149
type: string
4250
default: "v2"
51+
s3_staging_subdir:
52+
description: "Staging subdirectory to push the wheels for test"
53+
type: string
54+
default: "v2-staging"
4355
cloudfront_url:
4456
description: CloudFront URL pointing to Python index
4557
type: string
46-
default: "https://d25kgig7rdsyks.cloudfront.net/v2"
58+
default: "https://rocm.nightlies.amd.com/v2"
59+
cloudfront_staging_url:
60+
description: CloudFront base URL pointing to staging Python index
61+
type: string
62+
default: "https://rocm.nightlies.amd.com/v2-staging"
4763
rocm_version:
4864
description: ROCm version to pip install
4965
type: string
@@ -66,5 +82,7 @@ jobs:
6682
python_version: ${{ matrix.python_version }}
6783
release_type: ${{ inputs.release_type }}
6884
s3_subdir: ${{ inputs.s3_subdir }}
85+
s3_staging_subdir: ${{ inputs.s3_staging_subdir }}
6986
cloudfront_url: ${{ inputs.cloudfront_url }}
87+
cloudfront_staging_url: ${{ inputs.cloudfront_staging_url }}
7088
rocm_version: ${{ inputs.rocm_version }}

external-builds/pytorch/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,17 @@ mix/match build steps.
176176

177177
## Running/testing PyTorch
178178

179+
## Gating releases with Pytorch tests
180+
181+
With passing builds we upload Pytorch, TorchVisual, TorchAudio and Triton wheels to "v2-staging" s3 bucket
182+
https://rocm.nightlies.amd.com/<v2-staging>/<gfx110X-dgpu>/
183+
184+
Only with passing Torch tests we promote passed wheels to release s3 bucket
185+
https://rocm.nightlies.amd.com/<v2>/<gfx110X-dgpu>/
186+
187+
If no runner is available: Promotion is blocked by default. Set bypass_tests_for_releases=true only for exceptional cases under amdgpu_family_matrix.py.
188+
(/build_tools/github_actions/amdgpu_family_matrix.py)
189+
179190
### Running ROCm and PyTorch sanity checks
180191

181192
The simplest tests for a working PyTorch with ROCm install are:

0 commit comments

Comments
 (0)