Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .aitk/configs/checks.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
{
"configCheck": 127,
"configCheck": 128,
"copyCheck": 182,
"extensionCheck": 1,
"gitignoreCheck": 37,
"gitignoreCheck": 38,
"inferenceModelCheck": 25,
"ipynbCheck": 37,
"licenseCheck": 36,
"modelProjectCheck": 38,
"ipynbCheck": 38,
"licenseCheck": 37,
"modelProjectCheck": 39,
"oliveCheck": 36,
"oliveJsonCheck": 127,
"pathCheck": 992,
"oliveJsonCheck": 128,
"pathCheck": 1005,
"requirementsCheck": 37,
"templateCheck": 1,
"venvRequirementsCheck": 10
"venvRequirementsCheck": 11
}
16 changes: 15 additions & 1 deletion .aitk/configs/model_list.json
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,19 @@
"version": 1,
"p0": false
},
{
"displayName": "openai/whisper-large-v3-turbo",
"icon": "OpenAI",
"modelLink": "https://huggingface.co/openai/whisper-large-v3-turbo",
"id": "huggingface/openai/whisper-large-v3-turbo",
"runtimes": [
"QNN"
],
"architecture": "Transformer",
"status": "Hide",
"relativePath": "openai-whisper-large-v3-turbo/aitk",
"version": 1
},
{
"displayName": "Qwen/Qwen2.5-0.5B",
"icon": "qwen",
Expand Down Expand Up @@ -666,7 +679,8 @@
"timm/mini-imagenet": "https://huggingface.co/datasets/timm/mini-imagenet",
"wikipedia": "https://huggingface.co/datasets/wikimedia/wikipedia",
"google-research-datasets/conceptual_captions": "https://huggingface.co/datasets/google-research-datasets/conceptual_captions",
"AIMClab-RUC/COCO-CN": "https://huggingface.co/datasets/AIMClab-RUC/COCO-CN"
"AIMClab-RUC/COCO-CN": "https://huggingface.co/datasets/AIMClab-RUC/COCO-CN",
"librispeech_asr": "https://huggingface.co/datasets/openslr/librispeech_asr"
},
"LoginRequiredDatasets": [
"imagenet-1k"
Expand Down
9 changes: 9 additions & 0 deletions .aitk/requirements/requirements-WCR-QAI.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
gdown==5.2.0
gitpython==3.1.46
librosa==0.11.0
qai_hub==0.42.0
ruamel-yaml==0.19.1
schema==0.7.8
sounddevice==0.5.2
# need to install without deps because it depends on onnxruntime
# uv pip:install qai_hub_models==0.39.1 --no-deps;post
3 changes: 2 additions & 1 deletion .aitk/scripts/sanitize/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class ArchitectureEnum(Enum):

class ModelStatusEnum(Enum):
Ready = "Ready"
Coming = "Coming"
Hide = "Hide"


Expand Down Expand Up @@ -107,6 +106,7 @@ class OliveDeviceTypes(Enum):
# Pass name is case insensitive, so we use lower case for all pass names
# Should sort by value
class OlivePassNames:
AitkPython = "aitkpython"
ModelBuilder = "modelbuilder"
NVModelOptQuantization = "nvmodeloptquantization"
OnnxFloatToFloat16 = "onnxfloattofloat16"
Expand Down Expand Up @@ -160,6 +160,7 @@ class OlivePropertyNames:
TargetDevice = "target_device"
Type = "type"
UserConfig = "user_config"
UserScript = "user_script"
WeightFormat = "weight_format"


Expand Down
5 changes: 4 additions & 1 deletion .aitk/scripts/sanitize/file_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,11 @@ def readCheckIpynb(ipynbFile: str, modelItems: dict[str, ModelParameter]):
importStr = importOnnxgenairuntime
elif modelParameter.runtime.values and modelParameter.isIntel:
testPath = outputModelIntelNPURelativePath
elif modelParameter.aitkPython:
testPath = None
importStr = None
for item in [testPath, importStr]:
if not re.search(item, ipynbContent):
if item and not re.search(item, ipynbContent):
printError(f"{ipynbFile} does not have '{item}' for {name}, please use it as input")
if modelParameter.evalRuntime:
runtime = GlobalVars.RuntimeToEPName[modelParameter.evalRuntime]
Expand Down
49 changes: 37 additions & 12 deletions .aitk/scripts/sanitize/generator_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,25 @@ def create_model_parameter(aitk, name: str, configFile: Path):
return parameter


def add_optimization_wa(optimizationPaths: list[OptimizationPath], k: str, v: dict) -> bool:
if OlivePropertyNames.Precision in v:
optimizationPaths.append(
OptimizationPath(
name="WeightType",
path=f"{OlivePropertyNames.Passes}.{k}.{OlivePropertyNames.Precision}",
)
)
# We require both weight and activation type for quantization
optimizationPaths.append(
OptimizationPath(
name="ActivationType",
path=f"{OlivePropertyNames.Passes}.{k}.{OlivePropertyNames.ActivationType}",
)
)
return True
return False


def set_optimization_path(parameter: ModelParameter, configFile: str):
parameter.optimizationPaths = []
with open_ex(configFile, "r") as f:
Expand Down Expand Up @@ -64,18 +83,7 @@ def set_optimization_path(parameter: ModelParameter, configFile: str):
OlivePassNames.OnnxStaticQuantization,
OlivePassNames.OnnxDynamicQuantization,
]:
parameter.optimizationPaths.append(
OptimizationPath(
name="WeightType",
path=f"{OlivePropertyNames.Passes}.{k}.{OlivePropertyNames.Precision}",
)
)
parameter.optimizationPaths.append(
OptimizationPath(
name="ActivationType",
path=f"{OlivePropertyNames.Passes}.{k}.{OlivePropertyNames.ActivationType}",
)
)
add_optimization_wa(parameter.optimizationPaths, k, v)
return
elif vType == OlivePassNames.OnnxFloatToFloat16:
parameter.optimizationPaths.append(
Expand All @@ -85,3 +93,20 @@ def set_optimization_path(parameter: ModelParameter, configFile: str):
)
)
return
elif vType == OlivePassNames.AitkPython:
# Check AitkPython specific properties
if k != OlivePassNames.AitkPython:
raise Exception(f"AitkPython pass key must be '{OlivePassNames.AitkPython}' in {configFile}")
if OlivePropertyNames.UserScript in v:
parameter.aitkPython = v[OlivePropertyNames.UserScript]
python_script = Path(configFile).parent / str(parameter.aitkPython)
if not python_script.exists():
raise Exception(f"UserScript file {python_script} does not exist for AitkPython pass in {configFile}")
else:
raise Exception(f"UserScript is required for AitkPython pass in {configFile}")
wa_added = add_optimization_wa(parameter.optimizationPaths, k, v)
if wa_added:
return
else:
# TODO handle other optimization types if needed
return
19 changes: 9 additions & 10 deletions .aitk/scripts/sanitize/generator_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path
from typing import Optional

from .constants import OlivePassNames, OlivePropertyNames, PhaseTypeEnum, ParameterTypeEnum
from .constants import OlivePassNames, OlivePropertyNames, ParameterTypeEnum, PhaseTypeEnum
from .generator_common import create_model_parameter, set_optimization_path
from .model_info import ModelList
from .model_parameter import ModelParameter, OptimizationPath, Section
Expand Down Expand Up @@ -40,15 +40,14 @@ def generate_quantization_config(configFile: Path, parameter: ModelParameter) ->
phase=PhaseTypeEnum.Quantization,
parameters=parameters,
disableToggleGeneration=True,
toggle = Parameter(
autoGenerated=True,
name="Optimize model",
type=ParameterTypeEnum.Bool,
path=optimize_path,
readOnly=True,
actions=[[], []],
)

toggle=Parameter(
autoGenerated=True,
name="Optimize model",
type=ParameterTypeEnum.Bool,
path=optimize_path,
readOnly=True,
actions=[[], []],
),
)
return None

Expand Down
11 changes: 5 additions & 6 deletions .aitk/scripts/sanitize/model_parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,14 @@ def Check(
printError(f"{_file} section {sectionId} parameter {i} has error")

# TODO move tag check into Parameter
# TODO guess for possible tags
if parameter.path and Section.datasetPathPattern(parameter.path):
if self.phase == PhaseTypeEnum.Quantization:
if not parameter.tags or ParameterTagEnum.QuantizationDataset not in parameter.tags:
printError(f"{_file} section {sectionId} parameter {i} should have QuantizationDataset tag")
elif self.phase == PhaseTypeEnum.Evaluation:
if not parameter.tags or ParameterTagEnum.EvaluationDataset not in parameter.tags:
printError(f"{_file} section {sectionId} parameter {i} should have EvaluationDataset tag")
if parameter.values:
missing_keys = [key for key in parameter.values if key not in modelList.HFDatasets]
if missing_keys:
printError(f"datasets are not in HFDatasets: {', '.join(str(key) for key in missing_keys)}")
elif parameter.path and parameter.path.endswith("activation_type"):
if not parameter.tags or ParameterTagEnum.ActivationType not in parameter.tags:
printError(f"{_file} section {sectionId} parameter {i} should have ActivationType tag")
Expand Down Expand Up @@ -258,6 +255,7 @@ class ModelParameter(BaseModelClass):
runtimeInConversion: Optional[Parameter] = None
optimizationPaths: Optional[List[OptimizationPath]] = None
optimizationDefault: Optional[str] = None
aitkPython: Optional[str] = None
sections: List[Section] = []

@staticmethod
Expand Down Expand Up @@ -379,7 +377,8 @@ def Check(self, templates: Dict[str, Parameter], oliveJson: Any, modelList: Mode
conversion = [
k
for k, v in oliveJson[OlivePropertyNames.Passes].items()
if v[OlivePropertyNames.Type].lower() == OlivePassNames.OnnxConversion
if v[OlivePropertyNames.Type].lower()
in [OlivePassNames.OnnxConversion, OlivePassNames.AitkPython]
][0]
conversionPath = f"{OlivePropertyNames.Passes}.{conversion}"
section.toggle = Parameter(
Expand Down Expand Up @@ -601,7 +600,7 @@ def checkPhase(self, oliveJson: Any):
if (
PhaseTypeEnum.Evaluation in allPhases
and PhaseTypeEnum.Quantization in allPhases
and len(oliveJson[OlivePropertyNames.DataConfigs]) != 2
and (OlivePropertyNames.DataConfigs not in oliveJson or len(oliveJson[OlivePropertyNames.DataConfigs]) != 2)
):
printWarning(f"{self._file}'s olive json should have two data configs for evaluation")

Expand Down
27 changes: 17 additions & 10 deletions .aitk/scripts/sanitize/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,16 +207,23 @@ def Check(
if value != self.values[0]:
printError(f"Value {value} not the first in values for {self.path}")
return False
for i in range(len(self.values) - 1):
value_in_list = self.values[i + 1]
if modelList and value_in_list not in modelList.DatasetSplit:
printError(f"Value {value_in_list} not in DatasetSplit for {self.path}")
return False
if modelList and value_in_list not in modelList.DatasetSubset:
# No error for this, just warning
printWarning(
f"Value {value_in_list} not in DatasetSubset for {self.path}. Could be acceptable if it doesn't have subset"
)
if modelList:
for i in range(len(self.values)):
value_in_list = self.values[i]
if value_in_list not in modelList.HFDatasets:
printError(f"Value {value_in_list} not in HFDatasets for {self.path}")
return False
if i == 0:
# The first one doesn't need to be in DatasetSplit or DatasetSubset
continue
if value_in_list not in modelList.DatasetSplit:
printError(f"Value {value_in_list} not in DatasetSplit for {self.path}")
return False
if value_in_list not in modelList.DatasetSubset:
# No error for this, just warning
printWarning(
f"Value {value_in_list} not in DatasetSubset for {self.path}. Could be acceptable if it doesn't have subset"
)
elif value and value not in self.values:
printError(f"Value {value} not in values for {self.path}")
return False
Expand Down
6 changes: 6 additions & 0 deletions openai-whisper-large-v3-turbo/aitk/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
__pycache__
/cache
/history/*/*
!/history/*/history.config
!/history/*/olive_config.json
/data
31 changes: 31 additions & 0 deletions openai-whisper-large-v3-turbo/aitk/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
## Whisper-large-v3-turbo Optimization with ONNX Runtime QNN EP

This folder outlines the process for optimizing the Whisper-large-v3-turbo model using ONNX Runtime with the QNN Execution Provider. It includes steps for exporting FP32 models, generating representative data for static quantization, creating QDQ models, model evaluation and performing audio transcription using the optimized models.

### Generate data for static quantization

To get better results, we need to generate real data from original FP32 model instead of using random data for static quantization. Here we use 100 samples of librispeech dataset to generate the required real data which requires around 164 GB of disk space.

First generate FP32 onnx models:

1. Encoder FP32 model

`olive run --config whisper_large_v3_turbo_encoder_fp32.json`
1. Decoder FP32 model

`olive run --config whisper_large_v3_turbo_decoder_fp32.json`

Then download and generate data:

1. `python .\qnn_run.py --audio-path .\data\librispeech_asr_clean_test --encoder "models\whisper_encoder_fp32\model\model.onnx" --decoder "models\whisper_decoder_fp32\model.onnx" --model_id "openai/whisper-large-v3-turbo" --save_data .\data\quantization_data --num_data 100`

### Generate QDQ models

1. `olive run --config whisper_large_v3_turbo_encoder_qdq.json`
2. `olive run --config whisper_large_v3_turbo_decoder_qdq.json`

(Optional) Use whisper_large_v3_turbo_encoder_qdq_ctx.json and whisper_large_v3_turbo_decoder_qdq_ctx.json to create onnx models with QNN context binaries embedded in them.

### To transcribe a single sample:

`python .\qnn_run.py --audio-path .\data\librispeech_asr_clean_test\1320-122617-0000.npy --encoder "models\whisper_encoder_qdq\model.onnx" --decoder "models\whisper_decoder_qdq\model.onnx" --model_id "openai/whisper-large-v3-turbo" --execution_provider QNNExecutionProvider`
26 changes: 26 additions & 0 deletions openai-whisper-large-v3-turbo/aitk/inference_sample.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "eed9c231",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"# TODO\n",
"ExecutionProvider=\"QNNExecutionProvider\""
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
12 changes: 12 additions & 0 deletions openai-whisper-large-v3-turbo/aitk/info.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
keywords:
aitk
arch: whisper
recipes:
- file: "qnn_workflow.json"
device: npu
ep: QNNExecutionProvider
aitk:
modelInfo:
id: "huggingface/openai/whisper-large-v3-turbo"
version: 1
status: Hide
12 changes: 12 additions & 0 deletions openai-whisper-large-v3-turbo/aitk/model_project.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"workflows": [
{
"file": "qnn_workflow.json",
"templateName": "qnn_workflow"
}
],
"modelInfo": {
"id": "huggingface/openai/whisper-large-v3-turbo",
"version": 1
}
}
Loading
Loading