From 13497f0dc3af7ef9c4bfc165663ecf6fac1be09f Mon Sep 17 00:00:00 2001
From: kadirnar <kadir.nar@hotmail.com>
Date: Fri, 3 May 2024 00:59:27 +0300
Subject: [PATCH] =?UTF-8?q?=F0=9F=8C=9E=20Add=20more=20parameter=20support?=
 =?UTF-8?q?=20for=20Whisper=20pipeline?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 requirements.txt                 |  2 ++
 whisperplus/pipelines/whisper.py | 15 +++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index fafb943..e4379f1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,3 +19,5 @@ accelerate
 pre-commit==3.4.0
 autollm==0.1.9
 speechbrain==0.5.16
+bitsandbytes
+flash-attn --no-build-isolation
diff --git a/whisperplus/pipelines/whisper.py b/whisperplus/pipelines/whisper.py
index 6f5449b..c197704 100644
--- a/whisperplus/pipelines/whisper.py
+++ b/whisperplus/pipelines/whisper.py
@@ -48,7 +48,14 @@ def load_model(self, model_id: str = "openai/whisper-large-v3"):
         self.processor = processor
         self.model = model
 
-    def __call__(self, audio_path: str, language: str = "turkish"):
+    def __call__(
+            self,
+            chunk_length_s: int = 30,
+            stride_length_s: int = 5,
+            audio_path: str = "test.mp3",
+            max_new_tokens: int = 128,
+            batch_size: int = 100,
+            language: str = "turkish"):
         """
         Converts audio to text using the pre-trained speech recognition model.
 
@@ -61,9 +68,9 @@ def __call__(self, audio_path: str, language: str = "turkish"):
         pipe = pipeline(
             "automatic-speech-recognition",
             model=self.model,
-            chunk_length_s=30,
-            stride_length_s=5,
-            max_new_tokens=128,
+            chunk_length_s=chunk_length_s,
+            stride_length_s=stride_length_s,
+            max_new_tokens=max_new_tokens,
             batch_size=100,
             device_map="auto",
             return_timestamps=True,