feat(python): adding test scripts

nekomeowww · nekomeowww · commit 97490a55b40c · 2025-07-09T14:20:29.000+08:00
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+# SCM syntax highlighting & preventing 3-way merges
+pixi.lock merge=binary linguist-language=YAML linguist-generated=true
diff --git a/.gitignore b/.gitignore
@@ -70,3 +70,7 @@ trace-*.json
 # Generated by Tauri
 # will have schema files for capabilities auto-completion
 /gen/schemas
+
+# pixi environments
+.pixi
+*.egg-info
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "python/dependencies/librosa"]
+	path = python/dependencies/librosa
+	url = https://github.com/librosa/librosa
diff --git a/cspell.config.yaml b/cspell.config.yaml
@@ -6,7 +6,9 @@ words:
   - AIRI
   - byteorder
   - clippy
+  - coreml
   - cuda
+  - directml
   - distil
   - dtolnay
   - DTYPE
@@ -16,6 +18,7 @@ words:
   - logprob
   - melfilters
   - mmaped
+  - ndarray
   - onnx
   - probs
   - Resampler
diff --git a/hack/hftf b/hack/hftf
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+import sys
+import os
+from huggingface_hub import hf_hub_download
+from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError, LocalEntryNotFoundError
+
+def get_hf_file_path(model_id, file_path):
+    """
+    Retrieves the local cached path for a file from a Hugging Face Hub repository,
+    then resolves its symbolic link to the actual blob file.
+
+    Args:
+        model_id (str): The ID of the model repository (e.g., "onnx-community/whisper-large-v3-turbo").
+        file_path (str): The relative path to the file within the repository (e.g., "onnx/encoder_model.onnx").
+
+    Returns:
+        A string containing the absolute local path to the resolved blob file, or an error message.
+    """
+    try:
+        # Step 1: Use hf_hub_download to find the file in the cache.
+        # local_files_only=True ensures we don't trigger a download.
+        symlink_path = hf_hub_download(
+            repo_id=model_id,
+            filename=file_path,
+            local_files_only=True
+        )
+
+        # Step 2: Use os.path.realpath to resolve the symbolic link to the actual file path.
+        resolved_path = os.path.realpath(symlink_path)
+        return resolved_path
+
+    except LocalEntryNotFoundError:
+        return f"Error: File '{file_path}' not found in local cache for repo '{model_id}'. Try downloading it first."
+    except RepositoryNotFoundError:
+        return f"Error: Model repository '{model_id}' not found on the Hugging Face Hub."
+    except EntryNotFoundError:
+        # This error is less likely with local_files_only=True, but good to keep.
+        return f"Error: File '{file_path}' not found in the repository '{model_id}'."
+    except Exception as e:
+        return f"An unexpected error occurred: {e}"
+
+if __name__ == "__main__":
+    # Check for the correct number of command-line arguments.
+    if len(sys.argv) != 3:
+        print("Usage: hftf <model_id> <file_path>")
+        print("\nExample:")
+        print("  hftf onnx-community/whisper-large-v3-turbo onnx/encoder_model.onnx")
+        sys.exit(1)
+
+    repo_id = sys.argv[1]
+    filename = sys.argv[2]
+
+    # Get the path and print it to standard output.
+    final_path = get_hf_file_path(repo_id, filename)
+    print(final_path)
+
+    # Exit with an error code if the path starts with "Error:"
+    if final_path.startswith("Error:"):
+        sys.exit(1)
diff --git a/pixi.lock b/pixi.lock
diff --git a/pixi.toml b/pixi.toml
@@ -0,0 +1,22 @@
+[workspace]
+authors = ["Neko Ayaka <neko@ayaka.moe>"]
+channels = ["conda-forge"]
+name = "candle-examples"
+platforms = ["osx-arm64", "linux-64", "win-64"]
+version = "0.1.0"
+
+[dependencies]
+python = "3.12.*"
+pip = ">=25.1.1,<26"
+
+[pypi-dependencies]
+setuptools = ">=80.9.0, <81"
+numpy = "==2.2"
+huggingface-hub = ">=0.33.2, <0.34"
+transformers = ">=4.53.1, <5"
+onnxruntime = ">=1.22.0, <2"
+torch = ">=2.7.1, <3"
+torchaudio = ">=2.7.1, <3"
+torchvision = ">=0.22.1, <0.23"
+matplotlib = ">=3.10.3, <4"
+librosa = { git = "https://github.com/librosa/librosa" }
diff --git a/python/test/whisper-tensor-visualize.py b/python/test/whisper-tensor-visualize.py
@@ -0,0 +1,133 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+from transformers import WhisperProcessor
+from os import path
+
+# --- Configuration ---
+# This must match the model used to generate the data
+MODEL_ID = "onnx-community/lite-whisper-large-v3-ONNX"
+# MODEL_ID = "onnx-community/whisper-large-v3-turbo"
+
+# Directory where the .npy files are stored
+INPUT_DIR = Path("verification_data")
+
+# Number of top tokens to show in the logits plot
+TOP_K_LOGITS = 20
+
+def plot_mel_spectrogram(features, output_path):
+    """Generates and saves a plot of the mel spectrogram."""
+    if features.ndim == 3 and features.shape[0] == 1:
+        features = features.squeeze(0) # Remove batch dimension
+
+    fig, ax = plt.subplots(figsize=(12, 6))
+    im = ax.imshow(features, aspect='auto', origin='lower', cmap='viridis', interpolation='none')
+    fig.colorbar(im, ax=ax, format='%+2.0f dB')
+    ax.set_title("Input Mel Spectrogram")
+    ax.set_xlabel("Time Steps")
+    ax.set_ylabel("Mel Bins")
+    plt.tight_layout()
+    plt.savefig(output_path)
+    print(f"Saved spectrogram plot to {output_path}")
+    return fig
+
+def plot_encoder_output(hidden_states, output_path):
+    """Generates and saves a plot of the encoder hidden states."""
+    if hidden_states.ndim == 3 and hidden_states.shape[0] == 1:
+        hidden_states = hidden_states.squeeze(0) # Remove batch dimension
+
+    fig, ax = plt.subplots(figsize=(12, 6))
+    im = ax.imshow(hidden_states, aspect='auto', origin='lower', cmap='viridis', interpolation='none')
+    fig.colorbar(im, ax=ax)
+    ax.set_title("Encoder Hidden States")
+    ax.set_xlabel("Sequence Length")
+    ax.set_ylabel("Hidden Dimension")
+    plt.tight_layout()
+    plt.savefig(output_path)
+    print(f"Saved encoder output plot to {output_path}")
+    return fig
+
+def plot_logits(logits, tokenizer, output_path):
+    """Generates and saves a bar chart of the top K logits."""
+    # Logits shape is (batch, sequence, vocab_size). We want the logits for the *next* token.
+    # In the first step, the input sequence has 3 tokens, so we take the logits from the last position.
+    last_token_logits = logits[0, -1, :]
+
+    # Find the top K tokens and their corresponding logit values
+    top_k_indices = np.argsort(last_token_logits)[-TOP_K_LOGITS:]
+    top_k_values = last_token_logits[top_k_indices]
+
+    # Decode the token IDs to human-readable strings
+    top_k_tokens = [tokenizer.decode([idx]) for idx in top_k_indices]
+
+    # Find the token that was actually chosen (the one with the highest logit)
+    chosen_token_index = np.argmax(top_k_values)
+
+    fig, ax = plt.subplots(figsize=(10, 8))
+    bars = ax.barh(np.arange(TOP_K_LOGITS), top_k_values, color='skyblue')
+
+    # Highlight the chosen token in a different color
+    bars[chosen_token_index].set_color('salmon')
+
+    ax.set_yticks(np.arange(TOP_K_LOGITS))
+    ax.set_yticklabels(top_k_tokens)
+    ax.invert_yaxis() # Display the highest value at the top
+    ax.set_xlabel("Logit Value")
+    ax.set_title(f"Top {TOP_K_LOGITS} Predicted Tokens (First Decoder Step)")
+
+    # Add the logit values as text on the bars
+    for bar in bars:
+        width = bar.get_width()
+        label_x_pos = width if width > 0 else 1 # Position label correctly for negative logits
+        ax.text(label_x_pos, bar.get_y() + bar.get_height()/2, f' {width:.2f}',
+                va='center', ha='left')
+
+    plt.tight_layout()
+    plt.savefig(output_path)
+    print(f"Saved logits plot to {output_path}")
+    return fig
+
+
+def main():
+    """Loads data and generates all visualizations."""
+    # Ensure the input directory exists
+    if not INPUT_DIR.is_dir():
+        print(f"Error: Directory '{INPUT_DIR}' not found. Please run the data generation script first.")
+        return
+
+    # --- Load Data ---
+    try:
+        input_features = np.load(INPUT_DIR / f"{path.basename(MODEL_ID)}_input_features.npy")
+        encoder_output = np.load(INPUT_DIR / f"{path.basename(MODEL_ID)}_encoder_output.npy")
+        step_0_logits = np.load(INPUT_DIR / f"{path.basename(MODEL_ID)}_step_0_logits.npy")
+    except FileNotFoundError as e:
+        print(f"Error: Missing data file - {e}. Please ensure all .npy files exist in '{INPUT_DIR}'.")
+        return
+
+    print("Successfully loaded all .npy files.")
+
+    # --- Load Tokenizer ---
+    # The tokenizer is needed to decode the logit indices into text
+    print(f"Loading tokenizer for {MODEL_ID}...")
+    processor = WhisperProcessor.from_pretrained(MODEL_ID)
+    tokenizer = processor.tokenizer
+    print("Tokenizer loaded.")
+
+    # --- Generate Plots ---
+    # Create a directory to save the plots
+    plots_dir = Path("plots")
+    plots_dir.mkdir(exist_ok=True)
+
+    plot_mel_spectrogram(input_features, plots_dir / "mel_spectrogram.png")
+    plot_encoder_output(encoder_output, plots_dir / "encoder_output.png")
+    plot_logits(step_0_logits, tokenizer, plots_dir / "step_0_logits.png")
+
+    # --- Show Plots ---
+    # This will open interactive windows for each plot.
+    print("\nDisplaying plots. Close the plot windows to exit the script.")
+    plt.show()
+
+
+if __name__ == "__main__":
+    # You will need matplotlib: pip install matplotlib
+    main()
diff --git a/python/test/whisper-test.py b/python/test/whisper-test.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# SCM syntax highlighting & preventing 3-way merges`
	`2`	`+pixi.lock merge=binary linguist-language=YAML linguist-generated=true`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[submodule "python/dependencies/librosa"]`
	`2`	`+ path = python/dependencies/librosa`
	`3`	`+ url = https://github.com/librosa/librosa`