Skip to content

Commit

Permalink
Add Whisper model in CI (#19604)
Browse files Browse the repository at this point in the history
### Description
 Add Whisper Conversion and E2E into Big Models pipeline



### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->

---------

Co-authored-by: Your Name <[email protected]>
Co-authored-by: kunal-vaishnavi <[email protected]>
  • Loading branch information
3 people authored Feb 25, 2024
1 parent c980149 commit 0fcc6fb
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 8 deletions.
4 changes: 2 additions & 2 deletions onnxruntime/python/tools/transformers/benchmark_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ def measure_memory(is_gpu, func, monitor_type="cuda", start_memory=None):
if max_usage is None:
return None

print(f"GPU memory usage: before={memory_before_test} peak={max_usage}")
logger.info(f"GPU memory usage: before={memory_before_test} peak={max_usage}")
if len(memory_before_test) >= 1 and len(max_usage) >= 1 and len(memory_before_test) == len(max_usage):
# When there are multiple GPUs, we will check the one with maximum usage.
max_used = 0
Expand Down Expand Up @@ -620,7 +620,7 @@ def measure_memory(is_gpu, func, monitor_type="cuda", start_memory=None):
monitor.keep_measuring = False
max_usage = mem_thread.result()

print(f"CPU memory usage: before={memory_before_test:.1f} MB, peak={max_usage:.1f} MB")
logger.info(f"CPU memory usage: before={memory_before_test:.1f} MB, peak={max_usage:.1f} MB")
return max_usage - memory_before_test


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,8 @@ def handle_output(output):
actual_output = handle_output(ort_outputs[0][0])
logger.info(f"Generated token length: {len(actual_output)} tokens")
transcription = args.processor.batch_decode(ort_outputs[0], skip_special_tokens=True)[0]
logger.info(f"Transcription: {transcription}")
# print to stdout as the output for comparison
print(f"{transcription}")

measure_fn(args, generate_fn, ort_inputs)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@ librosa
optimum
onnxruntime-extensions>=0.9.0
protobuf==3.20.2
numpy==1.23.3
numpy==1.23.3
onnx>=1.15.0
psutil
py3nvml
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
the cut on his chest still dripping blood the ache of his overstrained eyes even the soaring arena around him with the thousands of spectators were trivialities not worth thinking about
101 changes: 100 additions & 1 deletion tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -314,12 +314,111 @@ stages:
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
python3 -m pip install --upgrade pip ; \
pushd models/llama ; \
python3 -m pip install -r requirements-cuda.txt ; \
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m pip uninstall -y torch ; \
python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp16 --precision fp16 --execution_provider cuda --input /meta-llama2 --small_gpu ;\
popd ; \
"
displayName: 'Run Llama2 to Onnx F16 and parity Test'
workingDirectory: $(Build.SourcesDirectory)
- stage: Whisper_ONNX
dependsOn:
- Build_Onnxruntime_Cuda
jobs:
- job: Whisper_ONNX
variables:
skipComponentGovernanceDetection: true
workspace:
clean: all
pool: Onnxruntime-Linux-A10-24G
steps:
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
displayName: 'Clean Agent Directories'
condition: always()

- checkout: self
clean: true
submodules: none

- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Onnxruntime Artifact'
ArtifactName: 'drop-ort-linux-gpu'
TargetPath: '$(Build.BinariesDirectory)/ort-artifact/'
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu
Context: tools/ci_build/github/linux/docker/
ScriptName: tools/ci_build/get_docker_image.py
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
Repository: onnxruntimepackagestest
UpdateDepsTxt: false

- task: DownloadPackage@1
# The model data in artifact is downloaded from openai/whisper-large-v3 in huggingface model hub
# In order to save size, removed .git directory and pickled files, and keep the safetensors model files
displayName: 'Download Whisper Model'
inputs:
packageType: upack
feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
version: 1.0.0
definition: 'b583ce7c-1a8f-4099-ae28-5d5f56c478b1'
downloadPath: $(Agent.TempDirectory)/whisper_large_v3

- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/whisper_large_v3:/whisper_large_v3 \
onnxruntimepackagestest \
bash -c '
set -ex; \
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
python3 -m pip install --upgrade pip ; \
pushd models/whisper ; \
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m pip uninstall -y torch ; \
python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
python3 -m models.whisper.convert_to_onnx -m /whisper_large_v3 --output whisperlargev3 --use_external_data_format ; \
popd ; \
'
displayName: 'Convert Whisper Model'
workingDirectory: $(Build.SourcesDirectory)
- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/whisper_large_v3:/whisper_large_v3 \
onnxruntimepackagestest \
bash -c '
set -ex; \
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
python3 -m pip install --upgrade pip ; \
pushd models/whisper ; \
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m pip uninstall -y torch ; \
python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
ls whisperlargev3; \
python3 -m models.whisper.benchmark \
--benchmark-type ort \
--audio-path models/whisper/test/1272-141231-0002.mp3 \
--model-name openai/whisper-large-v3 \
--ort-model-path /workspace/onnxruntime/python/tools/transformers/whisperlargev3/whisper_large_v3_beamsearch.onnx \
--precision fp32 \
--device cuda > ort_output.txt ; \
cat ort_output.txt ; \
diff ort_output.txt /workspace/onnxruntime/python/tools/transformers/models/whisper/test/whisper_ort_output.txt && exit 0 || exit 1
popd ; \
'
displayName: 'Test Whisper ONNX Model'
workingDirectory: $(Build.SourcesDirectory)
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,18 @@ ENV DEBIAN_FRONTEND=noninteractive
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG}:${LD_LIBRARY_PATH}

RUN apt-get update &&\
apt-get install -y git bash wget
apt-get install -y git bash wget diffutils

# Install python3
RUN apt-get install -y --no-install-recommends \
python3 \
python3-pip \
python3-dev \
python3-wheel

python3-wheel

# Install ffmpeg, which couldn't be installed in UBI8
# https://stackoverflow.com/questions/73597789/how-to-install-ffmpeg-on-ubi-docker-images
RUN apt-get install -y --no-install-recommends ffmpeg

RUN pip install --upgrade pip

Expand Down

0 comments on commit 0fcc6fb

Please sign in to comment.