Enable Whisper Test with OMP_FFMPEG (#20402)

### Description Installing OMP_FFMPEG in the docker and Readd Whisper Test Download OMP_FFMPEG in restricted accessed Azure blob.
microsoft · Apr 22, 2024 · 197b3f1 · 197b3f1
1 parent a457c1d
commit 197b3f1
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 5 deletions.
diff --git a/onnxruntime/python/tools/transformers/models/whisper/README.md b/onnxruntime/python/tools/transformers/models/whisper/README.md
@@ -10,14 +10,15 @@ Please note the package versions needed for using Whisper in the `requirements.t
   - Note that `torch` with CUDA enabled is not installed automatically. This is because `torch` should be installed with the CUDA version used on your machine. Please visit [the PyTorch website](https://pytorch.org/get-started/locally/) to download the `torch` version that is used with the CUDA version installed on your machine and satisfies the requirement listed in the file.
 - `requirements.txt`
   - Package versions needed in each of the above files
-- ffmpeg-python is also required, but please install it by source code with allowed codecs to avoid any patent risks.
 
 In addition to the above packages, you will need to install `ffmpeg` on your machine. Visit the [FFmpeg website](https://ffmpeg.org/) for details. You can also install it natively using package managers.
 
 - Linux: `sudo apt-get install ffmpeg`
 - MacOS: `sudo brew install ffmpeg`
 - Windows: Download from website
 
+**FFMPEG includes numerous codecs, many of which are likely not used by your product/service. Microsoft engineering teams using FFMPEG must build FFMPEG to remove all the unneeded and unused codecs. Including codecs in your product/service, even if not used, can create patent risk for Microsoft. You are responsible for building FFMPEG in a way that follows this codec guidance.**
+
 ## Exporting Whisper with Beam Search
 
 There are several ways to export Whisper with beam search (using Whisper tiny as an example).

diff --git a/onnxruntime/python/tools/transformers/models/whisper/requirements.txt b/onnxruntime/python/tools/transformers/models/whisper/requirements.txt
@@ -1,6 +1,7 @@
 torch>=1.13.0
 transformers>=4.24.0
 openai-whisper
+ffmpeg-python
 datasets
 soundfile
 librosa

diff --git a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
@@ -352,13 +352,21 @@ stages:
         SpecificArtifact: ${{ parameters.specificArtifact }}
         BuildId: ${{ parameters.BuildId }}
 
+    - script: |
+        mkdir -p $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/ompffmpeg/
+        azcopy cp --recursive "https://lotusscus.blob.core.windows.net/models/ffmpeg/runtimes/linux-x64/native" $(Agent.TempDirectory)/ompffmpeg
+        cp $(Agent.TempDirectory)/ompffmpeg/native/* $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/ompffmpeg/
+        # we need to copy the files to the docker context
+        ls $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/ompffmpeg/
+      displayName: 'Download OMP FFmpeg'
+
     - template: templates/get-docker-image-steps.yml
       parameters:
-        Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu
+        Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu_ffmpeg
         Context: tools/ci_build/github/linux/docker/
         ScriptName: tools/ci_build/get_docker_image.py
-        DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
-        Repository: onnxruntimepackagestest
+        DockerBuildArgs: '--build-arg BUILD_UID=$( id -u )'
+        Repository: onnxruntimepackagestest_ompffmpeg
         UpdateDepsTxt: false
 
     - task: DownloadPackage@1
@@ -376,7 +384,7 @@ stages:
         docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
            -v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
            -v $(Agent.TempDirectory)/whisper_large_v3:/whisper_large_v3 \
-           onnxruntimepackagestest \
+           onnxruntimepackagestest_ompffmpeg \
             bash -c '
               set -ex; \
               pushd /workspace/onnxruntime/python/tools/transformers/ ; \
@@ -392,3 +400,35 @@ stages:
             '
       displayName: 'Convert Whisper Model'
       workingDirectory: $(Build.SourcesDirectory)
+
+    - script: |
+        docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
+           -v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
+           -v $(Agent.TempDirectory)/whisper_large_v3:/whisper_large_v3 \
+           onnxruntimepackagestest_ompffmpeg \
+            bash -c '
+              set -ex; \
+              pushd /workspace/onnxruntime/python/tools/transformers/ ; \
+              python3 -m pip install --upgrade pip ; \
+              pushd models/whisper ; \
+              python3 -m pip install -r requirements.txt ; \
+              popd ; \
+              python3 -m pip install /ort-artifact/*.whl ; \
+              python3 -m pip uninstall -y torch ; \
+              python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
+              ls whisperlargev3; \
+              export LD_LIBRARY_PATH=/tmp/ompffmpeg:${LD_LIBRARY_PATH}; \
+              ffmpeg -version; \
+              python3 -m models.whisper.benchmark \
+                  --benchmark-type ort \
+                  --audio-path models/whisper/test/1272-141231-0002.mp3 \
+                  --model-name openai/whisper-large-v3 \
+                  --ort-model-path /workspace/onnxruntime/python/tools/transformers/whisperlargev3/whisper_large_v3_beamsearch.onnx \
+                  --precision fp32 \
+                  --device cuda > ort_output.txt ; \
+              cat ort_output.txt ; \
+              diff ort_output.txt /workspace/onnxruntime/python/tools/transformers/models/whisper/test/whisper_ort_output.txt && exit 0 || exit 1
+              popd ; \
+            '
+      displayName: 'Test Whisper ONNX Model'
+      workingDirectory: $(Build.SourcesDirectory)
diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu_ffmpeg b/tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu_ffmpeg
@@ -0,0 +1,52 @@
+# --------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------
+# Dockerfile to run ONNXRuntime with TensorRT integration
+
+# Build base image with required system packages
+ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+ARG TRT_VERSION=8.6.1.6-1+cuda11.8
+ARG LD_LIBRARY_PATH_ARG=/usr/local/lib64:/usr/local/cuda/lib64
+FROM $BASEIMAGE AS base
+ARG TRT_VERSION
+ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}
+ENV DEBIAN_FRONTEND=noninteractive
+
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG}:${LD_LIBRARY_PATH}
+
+RUN apt-get update &&\
+    apt-get install -y git bash wget diffutils
+
+# Install python3
+RUN apt-get install -y --no-install-recommends \
+    python3 \
+    python3-pip \
+    python3-dev \
+    python3-wheel
+
+RUN pip install --upgrade pip
+
+# Install TensorRT
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\
+    apt-get update &&\
+    apt-get install -y libnvinfer8=${TRT_VERSION} libnvonnxparsers8=${TRT_VERSION} libnvparsers8=${TRT_VERSION} libnvinfer-plugin8=${TRT_VERSION} libnvinfer-lean8=${TRT_VERSION} libnvinfer-vc-plugin8=${TRT_VERSION} libnvinfer-dispatch8=${TRT_VERSION}\
+        libnvinfer-headers-dev=${TRT_VERSION} libnvinfer-headers-plugin-dev=${TRT_VERSION} libnvinfer-dev=${TRT_VERSION} libnvonnxparsers-dev=${TRT_VERSION} libnvparsers-dev=${TRT_VERSION} libnvinfer-plugin-dev=${TRT_VERSION} libnvinfer-lean-dev=${TRT_VERSION} libnvinfer-vc-plugin-dev=${TRT_VERSION}  libnvinfer-dispatch-dev=${TRT_VERSION}\
+        python3-libnvinfer=${TRT_VERSION} libnvinfer-samples=${TRT_VERSION} tensorrt-dev=${TRT_VERSION} tensorrt-libs=${TRT_VERSION}
+
+ADD scripts /tmp/scripts
+RUN cd /tmp/scripts && /tmp/scripts/install_dotnet.sh && rm -rf /tmp/scripts
+
+COPY ompffmpeg /tmp/ompffmpeg/
+RUN if [ -n "/tmp/ompffmpeg" ]; then \
+        chmod +x /tmp/ompffmpeg/ffmpeg && chmod +x /tmp/ompffmpeg/ffprobe; \
+        ln -s /tmp/ompffmpeg/ffmpeg /usr/local/bin/ffmpeg; ln -s /tmp/ompffmpeg/ffprobe /usr/local/bin/ffprobe; \
+    fi
+
+# Build final image from base.
+FROM base as final
+ARG BUILD_USER=onnxruntimedev
+ARG BUILD_UID=1000
+RUN adduser --uid $BUILD_UID $BUILD_USER
+WORKDIR /home/$BUILD_USER
+USER $BUILD_USER