bigmodel pipeline update cp38 to cp310 (#22793)

### Description  when updating from cp38 to cp310, there has some issues for bigmodel pipeine. there are two jobs failed: stable_diffusion and whisper. 1. for stable_diffusion, we are now using "nvcr.io/nvidia/pytorch:22.11-py3" from nvidia repo. it is for cuda11 and python3.8. and they are not providing python3.10 version for cuda 11. the latest version of this docker image is for cuda12 and python3.10. To solve this problem, i use a docker image of ubuntu22.04, and then install all need python package for this job. 2. for whisper. the original docker image is ubuntu20.04 which doesn't have python3.10, and has to update to ubuntu22.04.
microsoft · Nov 21, 2024 · 1e605be · 1e605be
1 parent 369d7bf
commit 1e605be
Show file tree

Hide file tree

Showing 6 changed files with 89 additions and 11 deletions.
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements/requirements.txt b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements/requirements.txt
@@ -10,6 +10,10 @@ packaging
 protobuf==3.20.3
 psutil
 sympy
+nvtx==0.2.5
+torchvision==0.15.2
+tensorrt==8.5.1.7
+mediapipe
 controlnet_aux==0.0.9
 # The following are for SDXL
 optimum==1.20.0

diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/test/requirements.txt b/onnxruntime/python/tools/transformers/models/stable_diffusion/test/requirements.txt
@@ -2,3 +2,4 @@ git+https://github.com/openai/CLIP.git
 open_clip_torch
 sentence_transformers
 pillow
+numpy==1.22.2
diff --git a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
@@ -115,7 +115,7 @@ stages:
               set -ex; \
               env; \
               ccache -s; \
-              /opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
+              /opt/python/cp310-cp310/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
                 --build_dir /build --cmake_generator Ninja \
                 --config Release --update --build \
                 --skip_submodule_sync \
@@ -180,6 +180,17 @@ stages:
         TargetPath: '$(Build.BinariesDirectory)/Release'
         SpecificArtifact: ${{ parameters.specificArtifact }}
         BuildId: ${{ parameters.BuildId }}
+    - template: templates/get-docker-image-steps.yml
+      parameters:
+        Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2204_gpu_opencv
+        Context: tools/ci_build/github/linux/docker/
+        ScriptName: tools/ci_build/get_docker_image.py
+        DockerBuildArgs: "
+        --build-arg BUILD_UID=$( id -u )
+        "
+        Repository: onnxruntimeubuntupackagestest_cuda11
+        UseImageCacheContainerRegistry: false
+        UpdateDepsTxt: false
 
     - task: Cache@2
       inputs:
@@ -196,18 +207,15 @@ stages:
           -v $(Build.BinariesDirectory)/Release:/Release \
           -v $(STABLE_DIFFUSION_MODEL_CACHE):/model_cache:rw \
           -v $(GenerateImage_DIR):/images:rw \
-          nvcr.io/nvidia/pytorch:22.11-py3 \
+          onnxruntimeubuntupackagestest_cuda11 \
           bash -c ' \
             set -ex; \
-            pip uninstall -y $(pip list --format=freeze | grep opencv); \
-            rm -rf /usr/local/lib/python3.8/dist-packages/cv2/; \
-            apt-get update; \
-            DEBIAN_FRONTEND="noninteractive" apt-get install --yes python3-opencv; \
             python3 --version; \
             python3 -m pip install --upgrade pip; \
             python3 -m pip install /Release/*.whl; \
             pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion; \
             python3 -m pip install -r requirements/cuda11/requirements.txt; \
+            python3 -m pip install numpy==1.22.2; \
             python3 -m pip install --upgrade polygraphy onnx-graphsurgeon ; \
             echo Generate an image guided by a text prompt; \
             python3 demo_txt2img.py --framework-model-dir /model_cache --seed 1 --deterministic "astronaut riding a horse on mars" ; \
@@ -238,7 +246,7 @@ stages:
     - script: |
         docker run --rm --gpus all -v $PWD:/workspace \
           -v $(CLIP_MODEL_CACHE):/model_cache:rw  \
-          nvcr.io/nvidia/pytorch:22.11-py3 \
+          onnxruntimeubuntupackagestest_cuda11 \
           bash -c '
             set -x; \
             python3 --version; \
@@ -265,14 +273,15 @@ stages:
     - script: |
         docker run --rm --gpus all -v $PWD:/workspace \
           -v $(CLIP_MODEL_CACHE):/model_cache:rw  \
-          nvcr.io/nvidia/pytorch:22.11-py3 \
+          onnxruntimeubuntupackagestest_cuda11 \
           bash -c '
             set -ex; \
             python3 --version; \
             python3 -m pip install --upgrade pip; \
             pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion/; \
             image2=$(find $(pwd) -name "astronaut_riding_a_h*.png") ; \
             pushd test; \
+            python3 -m pip install numpy==1.22.2; \
             python3 -m pip install -r requirements.txt; \
             echo check demo_txt2image.py generate image; \
             python3 -u check_image.py --image1 astronaut_riding_txt2image-DDIM-50.png --image2 $image2 --cache_dir /model_cache ; \
@@ -438,7 +447,7 @@ stages:
 
     - template: templates/get-docker-image-steps.yml
       parameters:
-        Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu_ffmpeg
+        Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2204_gpu_ffmpeg
         Context: tools/ci_build/github/linux/docker/
         ScriptName: tools/ci_build/get_docker_image.py
         DockerBuildArgs: '--build-arg BUILD_UID=$( id -u )'

diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0_torch b/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0_torch
@@ -9,7 +9,7 @@ ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
 ARG TRT_VERSION=10.6.0.26-1.cuda11.8
 FROM $BASEIMAGE AS base
 ARG TRT_VERSION
-ENV PATH=/opt/python/cp38-cp38/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}
+ENV PATH=/opt/python/cp310-cp310/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}
 
 RUN dnf install -y bash wget &&\
     dnf clean dbcache

diff --git a/...Dockerfile.package_ubuntu_2004_gpu_ffmpeg → ...Dockerfile.package_ubuntu_2204_gpu_ffmpeg b/...Dockerfile.package_ubuntu_2004_gpu_ffmpeg → ...Dockerfile.package_ubuntu_2204_gpu_ffmpeg
@@ -5,7 +5,7 @@
 # Dockerfile to run ONNXRuntime with TensorRT integration
 
 # Build base image with required system packages
-ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
 ARG TRT_VERSION=10.6.0.26-1+cuda11.8
 ARG LD_LIBRARY_PATH_ARG=/usr/local/lib64:/usr/local/cuda/lib64
 FROM $BASEIMAGE AS base

diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2204_gpu_opencv b/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2204_gpu_opencv
@@ -0,0 +1,64 @@
+# --------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------
+# Dockerfile to run ONNXRuntime with TensorRT integration
+
+# Build base image with required system packages
+ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
+ARG TRT_VERSION=10.6.0.26-1+cuda11.8
+ARG LD_LIBRARY_PATH_ARG=/usr/local/lib64:/usr/local/cuda/lib64
+FROM $BASEIMAGE AS base
+ARG TRT_VERSION
+ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}
+ENV DEBIAN_FRONTEND=noninteractive
+
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG}:${LD_LIBRARY_PATH}
+
+RUN apt-get update &&\
+    apt-get install -y git bash wget diffutils
+
+RUN DEBIAN_FRONTEND="noninteractive" apt-get install --yes python3-opencv
+
+# Install python3
+RUN apt-get install -y --no-install-recommends \
+    python3 \
+    python3-pip \
+    python3-dev \
+    python3-wheel
+
+RUN pip install --upgrade pip
+
+# Install TensorRT
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\
+    apt-get update &&\
+    apt-get install -y \
+    libnvinfer-dev=${TRT_VERSION} \
+    libnvinfer-dispatch-dev=${TRT_VERSION} \
+    libnvinfer-dispatch10=${TRT_VERSION} \
+    libnvinfer-headers-dev=${TRT_VERSION} \
+    libnvinfer-headers-plugin-dev=${TRT_VERSION} \
+    libnvinfer-lean-dev=${TRT_VERSION} \
+    libnvinfer-lean10=${TRT_VERSION} \
+    libnvinfer-plugin-dev=${TRT_VERSION} \
+    libnvinfer-plugin10=${TRT_VERSION} \
+    libnvinfer-vc-plugin-dev=${TRT_VERSION} \
+    libnvinfer-vc-plugin10=${TRT_VERSION} \
+    libnvinfer10=${TRT_VERSION} \
+    libnvonnxparsers-dev=${TRT_VERSION} \
+    libnvonnxparsers10=${TRT_VERSION} \
+    tensorrt-dev=${TRT_VERSION} \
+    libnvinfer-bin=${TRT_VERSION} &&\
+    if [ $(echo $CUDA_VERSION | cut -d"." -f1) -ge 12 ]; then apt-get install -y cudnn9-cuda-12 ; fi
+#   ^^^^^^^^^^^If cuda version is 12 or higher, install cudnn 9 for cuda 12
+
+ADD scripts /tmp/scripts
+RUN cd /tmp/scripts && /tmp/scripts/install_dotnet.sh && rm -rf /tmp/scripts
+
+# Build final image from base.
+FROM base as final
+ARG BUILD_USER=onnxruntimedev
+ARG BUILD_UID=1000
+RUN adduser --uid $BUILD_UID $BUILD_USER
+WORKDIR /home/$BUILD_USER
+USER $BUILD_USER