From 57899a3cfb3e393b6c8249156ec825cc8572fb2f Mon Sep 17 00:00:00 2001 From: xqyz <10251866+bphd@users.noreply.github.com> Date: Wed, 18 Dec 2024 09:33:22 +0000 Subject: [PATCH 1/2] tokenizer.py: Stating when language is not detected out of the audio --- whisper/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisper/tokenizer.py b/whisper/tokenizer.py index 2af837570..501b7a945 100644 --- a/whisper/tokenizer.py +++ b/whisper/tokenizer.py @@ -381,7 +381,7 @@ def get_tokenizer( if multilingual: encoding_name = "multilingual" - language = language or "en" + language = language or "default" task = task or "transcribe" else: encoding_name = "gpt2" From 66f30a3d3c41a0ffe9f84554b415c22a45b807a7 Mon Sep 17 00:00:00 2001 From: xqyz <10251866+bphd@users.noreply.github.com> Date: Wed, 18 Dec 2024 09:37:09 +0000 Subject: [PATCH 2/2] decoding.py: Stating when language is not detected out of the audio --- whisper/decoding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisper/decoding.py b/whisper/decoding.py index 49485d009..5aa7f9df2 100644 --- a/whisper/decoding.py +++ b/whisper/decoding.py @@ -514,7 +514,7 @@ class DecodingTask: def __init__(self, model: "Whisper", options: DecodingOptions): self.model = model - language = options.language or "en" + language = options.language or "default" tokenizer = get_tokenizer( model.is_multilingual, num_languages=model.num_languages,