Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transcribe app improvements #76

Merged
merged 9 commits into from
Mar 24, 2025
Prev Previous commit
Next Next commit
Auto fetch language associated by each model
Kostis-S-Z committed Mar 20, 2025
commit d4ce76dd451e828a726dc37b34798933086a842e
43 changes: 25 additions & 18 deletions demo/transcribe_app.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,33 @@
import os
import gradio as gr
import spaces
from huggingface_hub import get_collection
from huggingface_hub import get_collection, HfApi
from transformers import pipeline, Pipeline

is_hf_space = os.getenv("IS_HF_SPACE")
model_ids = (
[""]
+ [
i.item_id + f" ({i.note})"
for i in get_collection(
"mozilla-ai/common-voice-whisper-67b847a74ad7561781aa10fd"
).items
]
+ [
"openai/whisper-tiny (Multilingual)",
"openai/whisper-small (Multilingual)",
"openai/whisper-medium (Multilingual)",
"openai/whisper-large-v3 (Multilingual)",
"openai/whisper-large-v3-turbo (Multilingual)",
]
)


def get_dropdown_model_ids():
mozilla_ai_model_ids = []
# Get model ids from collection and append the language in () from the model's metadata
for model_i in get_collection(
"mozilla-ai/common-voice-whisper-67b847a74ad7561781aa10fd"
).items:
model_metadata = HfApi().model_info(model_i.item_id)
language = model_metadata.card_data.model_name.split("on ")[1]
mozilla_ai_model_ids.append(model_i.item_id + f" ({language})")

return (
[""]
+ mozilla_ai_model_ids
+ [
"openai/whisper-tiny (Multilingual)",
"openai/whisper-small (Multilingual)",
"openai/whisper-medium (Multilingual)",
"openai/whisper-large-v3 (Multilingual)",
"openai/whisper-large-v3-turbo (Multilingual)",
]
)


def _load_local_model(model_dir: str) -> Pipeline | str:
@@ -132,7 +139,7 @@ def setup_gradio_demo():
"""
)
### Model selection ###

model_ids = get_dropdown_model_ids()
with gr.Row():
with gr.Column():
dropdown_model = gr.Dropdown(