microsoft · xieofxie · Jan 9, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026
@@ -1,16 +1,16 @@
 {
-    "configCheck": 127,
+    "configCheck": 128,
     "copyCheck": 182,
     "extensionCheck": 1,
-    "gitignoreCheck": 37,
+    "gitignoreCheck": 38,
     "inferenceModelCheck": 25,
-    "ipynbCheck": 37,
-    "licenseCheck": 36,
-    "modelProjectCheck": 38,
+    "ipynbCheck": 38,
+    "licenseCheck": 37,
+    "modelProjectCheck": 39,
     "oliveCheck": 36,
-    "oliveJsonCheck": 127,
-    "pathCheck": 992,
+    "oliveJsonCheck": 128,
+    "pathCheck": 1005,
     "requirementsCheck": 37,
     "templateCheck": 1,
-    "venvRequirementsCheck": 10
+    "venvRequirementsCheck": 11
 }
@@ -464,6 +464,19 @@
             "version": 1,
             "p0": false
         },
+        {
+            "displayName": "openai/whisper-large-v3-turbo",
+            "icon": "OpenAI",
+            "modelLink": "https://huggingface.co/openai/whisper-large-v3-turbo",
+            "id": "huggingface/openai/whisper-large-v3-turbo",
+            "runtimes": [
+                "QNN"
+            ],
+            "architecture": "Transformer",
+            "status": "Hide",
+            "relativePath": "openai-whisper-large-v3-turbo/aitk",
+            "version": 1
+        },
         {
             "displayName": "Qwen/Qwen2.5-0.5B",
             "icon": "qwen",
@@ -666,7 +679,8 @@
         "timm/mini-imagenet": "https://huggingface.co/datasets/timm/mini-imagenet",
         "wikipedia": "https://huggingface.co/datasets/wikimedia/wikipedia",
         "google-research-datasets/conceptual_captions": "https://huggingface.co/datasets/google-research-datasets/conceptual_captions",
-        "AIMClab-RUC/COCO-CN": "https://huggingface.co/datasets/AIMClab-RUC/COCO-CN"
+        "AIMClab-RUC/COCO-CN": "https://huggingface.co/datasets/AIMClab-RUC/COCO-CN",
+        "librispeech_asr": "https://huggingface.co/datasets/openslr/librispeech_asr"
     },
     "LoginRequiredDatasets": [
         "imagenet-1k"

@@ -0,0 +1,9 @@
+gdown==5.2.0
+gitpython==3.1.46
+librosa==0.11.0
+qai_hub==0.42.0
+ruamel-yaml==0.19.1
+schema==0.7.8
+sounddevice==0.5.2
+# need to install without deps because it depends on onnxruntime
+# uv pip:install qai_hub_models==0.39.1 --no-deps;post
@@ -32,7 +32,6 @@ class ArchitectureEnum(Enum):
 
 class ModelStatusEnum(Enum):
     Ready = "Ready"
-    Coming = "Coming"
     Hide = "Hide"
 
 
@@ -107,6 +106,7 @@ class OliveDeviceTypes(Enum):
 # Pass name is case insensitive, so we use lower case for all pass names
 # Should sort by value
 class OlivePassNames:
+    AitkPython = "aitkpython"
     ModelBuilder = "modelbuilder"
     NVModelOptQuantization = "nvmodeloptquantization"
     OnnxFloatToFloat16 = "onnxfloattofloat16"
@@ -160,6 +160,7 @@ class OlivePropertyNames:
     TargetDevice = "target_device"
     Type = "type"
     UserConfig = "user_config"
+    UserScript = "user_script"
     WeightFormat = "weight_format"
 
 

@@ -172,8 +172,11 @@ def readCheckIpynb(ipynbFile: str, modelItems: dict[str, ModelParameter]):
                 importStr = importOnnxgenairuntime
             elif modelParameter.runtime.values and modelParameter.isIntel:
                 testPath = outputModelIntelNPURelativePath
+            elif modelParameter.aitkPython:
+                testPath = None
+                importStr = None
             for item in [testPath, importStr]:
-                if not re.search(item, ipynbContent):
+                if item and not re.search(item, ipynbContent):
                     printError(f"{ipynbFile} does not have '{item}' for {name}, please use it as input")
             if modelParameter.evalRuntime:
                 runtime = GlobalVars.RuntimeToEPName[modelParameter.evalRuntime]

@@ -34,6 +34,25 @@ def create_model_parameter(aitk, name: str, configFile: Path):
     return parameter
 
 
+def add_optimization_wa(optimizationPaths: list[OptimizationPath], k: str, v: dict) -> bool:
+    if OlivePropertyNames.Precision in v:
+        optimizationPaths.append(
+            OptimizationPath(
+                name="WeightType",
+                path=f"{OlivePropertyNames.Passes}.{k}.{OlivePropertyNames.Precision}",
+            )
+        )
+        # We require both weight and activation type for quantization
+        optimizationPaths.append(
+            OptimizationPath(
+                name="ActivationType",
+                path=f"{OlivePropertyNames.Passes}.{k}.{OlivePropertyNames.ActivationType}",
+            )
+        )
+        return True
+    return False
+
+
 def set_optimization_path(parameter: ModelParameter, configFile: str):
     parameter.optimizationPaths = []
     with open_ex(configFile, "r") as f:
@@ -64,18 +83,7 @@ def set_optimization_path(parameter: ModelParameter, configFile: str):
             OlivePassNames.OnnxStaticQuantization,
             OlivePassNames.OnnxDynamicQuantization,
         ]:
-            parameter.optimizationPaths.append(
-                OptimizationPath(
-                    name="WeightType",
-                    path=f"{OlivePropertyNames.Passes}.{k}.{OlivePropertyNames.Precision}",
-                )
-            )
-            parameter.optimizationPaths.append(
-                OptimizationPath(
-                    name="ActivationType",
-                    path=f"{OlivePropertyNames.Passes}.{k}.{OlivePropertyNames.ActivationType}",
-                )
-            )
+            add_optimization_wa(parameter.optimizationPaths, k, v)
             return
         elif vType == OlivePassNames.OnnxFloatToFloat16:
             parameter.optimizationPaths.append(
@@ -85,3 +93,20 @@ def set_optimization_path(parameter: ModelParameter, configFile: str):
                 )
             )
             return
+        elif vType == OlivePassNames.AitkPython:
+            # Check AitkPython specific properties
+            if k != OlivePassNames.AitkPython:
+                raise Exception(f"AitkPython pass key must be '{OlivePassNames.AitkPython}' in {configFile}")
+            if OlivePropertyNames.UserScript in v:
+                parameter.aitkPython = v[OlivePropertyNames.UserScript]
+                python_script = Path(configFile).parent / str(parameter.aitkPython)
+                if not python_script.exists():
+                    raise Exception(f"UserScript file {python_script} does not exist for AitkPython pass in {configFile}")
+            else:
+                raise Exception(f"UserScript is required for AitkPython pass in {configFile}")
+            wa_added = add_optimization_wa(parameter.optimizationPaths, k, v)
+            if wa_added:
+                return
+            else:
+                # TODO handle other optimization types if needed
+                return
@@ -2,7 +2,7 @@
 from pathlib import Path
 from typing import Optional
 
-from .constants import OlivePassNames, OlivePropertyNames, PhaseTypeEnum, ParameterTypeEnum
+from .constants import OlivePassNames, OlivePropertyNames, ParameterTypeEnum, PhaseTypeEnum
 from .generator_common import create_model_parameter, set_optimization_path
 from .model_info import ModelList
 from .model_parameter import ModelParameter, OptimizationPath, Section
@@ -40,15 +40,14 @@ def generate_quantization_config(configFile: Path, parameter: ModelParameter) ->
             phase=PhaseTypeEnum.Quantization,
             parameters=parameters,
             disableToggleGeneration=True,
-            toggle = Parameter(
-                        autoGenerated=True,
-                        name="Optimize model",
-                        type=ParameterTypeEnum.Bool,
-                        path=optimize_path,
-                        readOnly=True,
-                        actions=[[], []],
-                    )
-
+            toggle=Parameter(
+                autoGenerated=True,
+                name="Optimize model",
+                type=ParameterTypeEnum.Bool,
+                path=optimize_path,
+                readOnly=True,
+                actions=[[], []],
+            ),
         )
     return None
 

@@ -113,17 +113,14 @@ def Check(
                 printError(f"{_file} section {sectionId} parameter {i} has error")
 
             # TODO move tag check into Parameter
+            # TODO guess for possible tags
             if parameter.path and Section.datasetPathPattern(parameter.path):
                 if self.phase == PhaseTypeEnum.Quantization:
                     if not parameter.tags or ParameterTagEnum.QuantizationDataset not in parameter.tags:
                         printError(f"{_file} section {sectionId} parameter {i} should have QuantizationDataset tag")
                 elif self.phase == PhaseTypeEnum.Evaluation:
                     if not parameter.tags or ParameterTagEnum.EvaluationDataset not in parameter.tags:
                         printError(f"{_file} section {sectionId} parameter {i} should have EvaluationDataset tag")
-                if parameter.values:
-                    missing_keys = [key for key in parameter.values if key not in modelList.HFDatasets]
-                    if missing_keys:
-                        printError(f"datasets are not in HFDatasets: {', '.join(str(key) for key in missing_keys)}")
             elif parameter.path and parameter.path.endswith("activation_type"):
                 if not parameter.tags or ParameterTagEnum.ActivationType not in parameter.tags:
                     printError(f"{_file} section {sectionId} parameter {i} should have ActivationType tag")
@@ -258,6 +255,7 @@ class ModelParameter(BaseModelClass):
     runtimeInConversion: Optional[Parameter] = None
     optimizationPaths: Optional[List[OptimizationPath]] = None
     optimizationDefault: Optional[str] = None
+    aitkPython: Optional[str] = None
     sections: List[Section] = []
 
     @staticmethod
@@ -379,7 +377,8 @@ def Check(self, templates: Dict[str, Parameter], oliveJson: Any, modelList: Mode
                         conversion = [
                             k
                             for k, v in oliveJson[OlivePropertyNames.Passes].items()
-                            if v[OlivePropertyNames.Type].lower() == OlivePassNames.OnnxConversion
+                            if v[OlivePropertyNames.Type].lower()
+                            in [OlivePassNames.OnnxConversion, OlivePassNames.AitkPython]
                         ][0]
                     conversionPath = f"{OlivePropertyNames.Passes}.{conversion}"
                     section.toggle = Parameter(
@@ -601,7 +600,7 @@ def checkPhase(self, oliveJson: Any):
         if (
             PhaseTypeEnum.Evaluation in allPhases
             and PhaseTypeEnum.Quantization in allPhases
-            and len(oliveJson[OlivePropertyNames.DataConfigs]) != 2
+            and (OlivePropertyNames.DataConfigs not in oliveJson or len(oliveJson[OlivePropertyNames.DataConfigs]) != 2)
         ):
             printWarning(f"{self._file}'s olive json should have two data configs for evaluation")
 

@@ -207,16 +207,23 @@ def Check(
                         if value != self.values[0]:
                             printError(f"Value {value} not the first in values for {self.path}")
                             return False
-                        for i in range(len(self.values) - 1):
-                            value_in_list = self.values[i + 1]
-                            if modelList and value_in_list not in modelList.DatasetSplit:
-                                printError(f"Value {value_in_list} not in DatasetSplit for {self.path}")
-                                return False
-                            if modelList and value_in_list not in modelList.DatasetSubset:
-                                # No error for this, just warning
-                                printWarning(
-                                    f"Value {value_in_list} not in DatasetSubset for {self.path}. Could be acceptable if it doesn't have subset"
-                                )
+                        if modelList:
+                            for i in range(len(self.values)):
+                                value_in_list = self.values[i]
+                                if value_in_list not in modelList.HFDatasets:
+                                    printError(f"Value {value_in_list} not in HFDatasets for {self.path}")
+                                    return False
+                                if i == 0:
+                                    # The first one doesn't need to be in DatasetSplit or DatasetSubset
+                                    continue
+                                if value_in_list not in modelList.DatasetSplit:
+                                    printError(f"Value {value_in_list} not in DatasetSplit for {self.path}")
+                                    return False
+                                if value_in_list not in modelList.DatasetSubset:
+                                    # No error for this, just warning
+                                    printWarning(
+                                        f"Value {value_in_list} not in DatasetSubset for {self.path}. Could be acceptable if it doesn't have subset"
+                                    )
                     elif value and value not in self.values:
                         printError(f"Value {value} not in values for {self.path}")
                         return False

@@ -0,0 +1,6 @@
+__pycache__
+/cache
+/history/*/*
+!/history/*/history.config
+!/history/*/olive_config.json
+/data
@@ -0,0 +1,31 @@
+## Whisper-large-v3-turbo Optimization with ONNX Runtime QNN EP
+
+This folder outlines the process for optimizing the Whisper-large-v3-turbo model using ONNX Runtime with the QNN Execution Provider. It includes steps for exporting FP32 models, generating representative data for static quantization, creating QDQ models, model evaluation and performing audio transcription using the optimized models.
+
+### Generate data for static quantization
+
+To get better results, we need to generate real data from original FP32 model instead of using random data for static quantization. Here we use 100 samples of librispeech dataset to generate the required real data which requires around 164 GB of disk space.
+
+First generate FP32 onnx models:
+
+1. Encoder FP32 model
+
+    `olive run --config whisper_large_v3_turbo_encoder_fp32.json`
+1. Decoder FP32 model
+
+    `olive run --config whisper_large_v3_turbo_decoder_fp32.json`
+
+Then download and generate data:
+
+1. `python .\qnn_run.py --audio-path .\data\librispeech_asr_clean_test --encoder "models\whisper_encoder_fp32\model\model.onnx" --decoder "models\whisper_decoder_fp32\model.onnx" --model_id "openai/whisper-large-v3-turbo" --save_data .\data\quantization_data --num_data 100`
+
+### Generate QDQ models
+
+1. `olive run --config whisper_large_v3_turbo_encoder_qdq.json`
+2. `olive run --config whisper_large_v3_turbo_decoder_qdq.json`
+
+(Optional) Use whisper_large_v3_turbo_encoder_qdq_ctx.json and whisper_large_v3_turbo_decoder_qdq_ctx.json to create onnx models with QNN context binaries embedded in them.
+
+### To transcribe a single sample:
+
+`python .\qnn_run.py --audio-path .\data\librispeech_asr_clean_test\1320-122617-0000.npy --encoder "models\whisper_encoder_qdq\model.onnx" --decoder "models\whisper_decoder_qdq\model.onnx" --model_id "openai/whisper-large-v3-turbo" --execution_provider QNNExecutionProvider`
@@ -0,0 +1,26 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eed9c231",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# TODO\n",
+    "ExecutionProvider=\"QNNExecutionProvider\""
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -0,0 +1,12 @@
+keywords:
+  aitk
+arch: whisper
+recipes:
+  - file: "qnn_workflow.json"
+    device: npu
+    ep: QNNExecutionProvider
+aitk:
+  modelInfo:
+    id: "huggingface/openai/whisper-large-v3-turbo"
+    version: 1
+    status: Hide
@@ -0,0 +1,12 @@
+{
+    "workflows": [
+        {
+            "file": "qnn_workflow.json",
+            "templateName": "qnn_workflow"
+        }
+    ],
+    "modelInfo": {
+        "id": "huggingface/openai/whisper-large-v3-turbo",
+        "version": 1
+    }
+}