Skip to content

Commit

Permalink
Update training packaging pipeline's docker files (#20853)
Browse files Browse the repository at this point in the history
### Description
Similar to #20786 . The last PR was able to update all pipelines and all
docker files. This is a follow-up to that PR.

### Motivation and Context
1. To extract the common part as a reusable build infra among different
ONNX Runtime projects.
2. Avoid hitting docker hub's limit: 429 Too Many Requests - Server
message: toomanyrequests: You have reached your pull rate limit. You may
increase the limit by authenticating and upgrading:
https://www.docker.com/increase-rate-limit
  • Loading branch information
snnn authored May 31, 2024
1 parent 00589f5 commit 67bc943
Show file tree
Hide file tree
Showing 42 changed files with 229 additions and 958 deletions.
10 changes: 2 additions & 8 deletions tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,6 @@ parameters:

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

- repository: LLaMa2Onnx
type: Github
endpoint: Microsoft
Expand All @@ -55,7 +49,7 @@ resources:
variables:
- template: templates/common-variables.yml
- name: docker_base_image
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20240530.3
- name: linux_trt_version
value: 10.0.1.6-1.cuda11.8
- name: Repository
Expand Down Expand Up @@ -113,7 +107,7 @@ stages:
inputs:
script: |
mkdir -p $HOME/.onnx
docker run -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
docker run -e --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,6 @@ variables:
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
- name: linux_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: 10.0.1.6-1.cuda11.8
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: 10.0.1.6-1.cuda12.4
- name: win_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: 11.8
Expand Down Expand Up @@ -184,7 +179,6 @@ stages:
DoCompliance: ${{ parameters.DoCompliance }}
CudaVersion: ${{ parameters.CudaVersion }}
docker_base_image: ${{ variables.docker_base_image }}
linux_trt_version: ${{ variables.linux_trt_version }}
RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }}
win_trt_home: ${{ variables.win_trt_home }}
Expand Down Expand Up @@ -228,6 +222,7 @@ stages:
--build-arg PREPEND_PATH=/opt/rh/gcc-toolset-12/root/usr/bin:
--build-arg LD_LIBRARY_PATH_ARG=/opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64:/usr/local/lib
Repository: onnxruntimetrainingrocmbuild-rocm$(RocmVersion)
CheckOutManyLinux: true

- template: templates/set-version-number-variables-step.yml

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,6 @@ variables:
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
- name: linux_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: 10.0.1.6-1.cuda11.8
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: 10.0.1.6-1.cuda12.4
- name: win_trt_home
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: $(Agent.TempDirectory)\TensorRT-10.0.1.6.Windows10.x86_64.cuda-11.8
Expand Down Expand Up @@ -120,7 +115,6 @@ stages:
DoCompliance: ${{ parameters.DoCompliance }}
CudaVersion: ${{ parameters.CudaVersion }}
docker_base_image: ${{ variables.docker_base_image }}
linux_trt_version: ${{ variables.linux_trt_version }}
RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }}
win_trt_home: ${{ variables.win_trt_home }}
Expand Down
8 changes: 1 addition & 7 deletions tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

##### start trigger Don't edit it manually, Please do edit set-trigger-rules.py ####
trigger:
branches:
Expand Down Expand Up @@ -27,13 +28,6 @@ pr:
- 'onnxruntime/core/providers/js'
#### end trigger ####

resources:
repositories:
- repository: manylinux # The name used to reference this repository in the checkout step
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7
stages:
- stage: x64
dependsOn: []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,6 @@ pr:
- BUILD.md
- 'js/web'
- 'onnxruntime/core/providers/js'
resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

jobs:
- job: Linux_CPU_Minimal_Build_E2E
timeoutInMinutes: 120
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,6 @@ pr:
- 'onnxruntime/core/providers/js'
#### end trigger ####

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

jobs:
- job: Linux_py_Wheels
timeoutInMinutes: 180
Expand Down
67 changes: 8 additions & 59 deletions tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,26 +45,12 @@ parameters:
type: string
default: '0'

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

variables:
- name: docker_base_image
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8

- name: linux_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: 10.0.1.6-1.cuda11.8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20240530.3
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: 10.0.1.6-1.cuda12.4
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_2_x64_ubi8_gcc12:20240530.3

- name: Repository
${{ if eq(parameters.CudaVersion, '11.8') }}:
Expand Down Expand Up @@ -97,12 +83,7 @@ stages:
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "
--network=host
--build-arg BASEIMAGE=$(docker_base_image)
--build-arg TRT_VERSION=$(linux_trt_version)
--build-arg BUILD_UID=$( id -u )
"
DockerBuildArgs: "--build-arg BASEIMAGE=$(docker_base_image) --build-arg BUILD_UID=$( id -u )"
Repository: $(Repository)

- task: Cache@2
Expand All @@ -123,7 +104,7 @@ stages:
- script: |
set -e -x
mkdir -p $HOME/.onnx
docker run -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
docker run --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
Expand All @@ -133,39 +114,12 @@ stages:
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
-e CCACHE_DIR=/cache \
$(Repository) \
/bin/bash -c "
set -ex; \
env; \
ccache -s; \
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release --update --build \
--skip_submodule_sync \
--build_shared_lib \
--parallel --use_binskim_compliant_compile_flags \
--build_wheel \
--enable_onnx_tests --use_cuda --cuda_version=${{parameters.CudaVersion}} --cuda_home=/usr/local/cuda-${{parameters.CudaVersion}} --cudnn_home=/usr/local/cuda-${{parameters.CudaVersion}} \
--enable_cuda_profiling --enable_cuda_nhwc_ops \
--enable_pybind --build_java \
--use_cache \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75 \
--cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=ON \
--cmake_extra_defines onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON; \
ccache -sv; \
ccache -z"
-e CCACHE_DIR=/cache -w /onnxruntime_src \
$(Repository) tools/ci_build/github/linux/build_cuda_ci.sh
workingDirectory: $(Build.SourcesDirectory)
displayName: Build Onnxruntime
- task: CmdLine@2
inputs:
script: |
rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11
rm -f $(Build.BinariesDirectory)/Release/models
find $(Build.BinariesDirectory)/Release/_deps -mindepth 1 ! -regex '^$(Build.BinariesDirectory)/Release/_deps/onnx-src\(/.*\)?' -delete
cd $(Build.BinariesDirectory)/Release
find -executable -type f > $(Build.BinariesDirectory)/Release/perms.txt
- script: $(Build.SourcesDirectory)/tools/ci_build/github/linux/delete_unused_files_before_upload.sh

- task: PublishPipelineArtifact@0
displayName: 'Publish Pipeline Artifact'
Expand Down Expand Up @@ -203,12 +157,7 @@ stages:
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "
--network=host
--build-arg BASEIMAGE=$(docker_base_image)
--build-arg TRT_VERSION=$(linux_trt_version)
--build-arg BUILD_UID=$( id -u )
"
DockerBuildArgs: "--build-arg BASEIMAGE=$(docker_base_image) --build-arg BUILD_UID=$( id -u )"
Repository: $(Repository)

- task: CmdLine@2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,19 @@ parameters:
values:
- 11.8
- 12.2
- 12.4
resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

variables:
- name: docker_base_image
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20240530.3
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
${{ if eq(parameters.CudaVersion, '12.4') }}:
value: nvidia/cuda:12.4.1-cudnn-devel-ubi8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_2_x64_ubi8_gcc12:20240530.3
- name: linux_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: 10.0.1.6-1.cuda11.8
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: 10.0.1.6-1.cuda12.4
${{ if eq(parameters.CudaVersion, '12.4') }}:
value: 10.0.1.6-1.cuda12.4

jobs:
- job: Linux_Build
timeoutInMinutes: 180
Expand Down Expand Up @@ -99,7 +89,7 @@ jobs:
- task: CmdLine@2
inputs:
script: |
docker run --gpus all -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
docker run --gpus all --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
Expand All @@ -109,26 +99,8 @@ jobs:
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
-e CCACHE_DIR=/cache \
onnxruntimetensorrt86gpubuild \
/bin/bash -c "
set -ex; \
ccache -s; \
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release \
--skip_submodule_sync \
--build_shared_lib \
--parallel --use_binskim_compliant_compile_flags \
--build_wheel \
--enable_onnx_tests \
--use_cuda --cuda_home=/usr/local/cuda-${{ parameters.CudaVersion }} --cudnn_home=/usr/lib64/ \
--enable_pybind --build_java \
--use_tensorrt --tensorrt_home /usr \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75 \
--use_cache; \
ccache -sv; \
ccache -z"
-e CCACHE_DIR=/cache -w /onnxruntime_src \
onnxruntimetensorrt86gpubuild tools/ci_build/github/linux/build_tensorrt_ci.sh
workingDirectory: $(Build.SourcesDirectory)

- template: templates/explicitly-defined-final-tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ jobs:
--build-arg PREPEND_PATH=/opt/rh/gcc-toolset-12/root/usr/bin:
--build-arg LD_LIBRARY_PATH_ARG=/opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64:/usr/local/lib
Repository: onnxruntimetrainingrocm-cibuild-rocm$(RocmVersion)-manylinux-build
CheckOutManyLinux: true

- task: Cache@2
inputs:
Expand Down
Loading

0 comments on commit 67bc943

Please sign in to comment.