Skip to content

Commit

Permalink
πŸ‘¨β€πŸ’» Update parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
kadirnar committed May 3, 2024
1 parent 24a7998 commit 3b41a84
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions whisperplus/pipelines/whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
class SpeechToTextPipeline:
"""Class for converting audio to text using a pre-trained speech recognition model."""

def __init__(self, model_id: str = "openai/whisper-large-v3"):
def __init__(self, model_id: str = "openai/whisper-large-v3", quant_config=None):
self.model = None
self.device = None

Expand All @@ -31,8 +31,7 @@ def load_model(self, model_id: str = "openai/whisper-large-v3", quant_config=Non
low_cpu_mem_usage=True,
use_safetensors=True,
attn_implementation="flash_attention_2",
device_map="auto")

)
logging.info("Model loaded successfully.")

processor = AutoProcessor.from_pretrained(model_id)
Expand All @@ -47,7 +46,9 @@ def __call__(
audio_path: str = "test.mp3",
max_new_tokens: int = 128,
batch_size: int = 100,
language: str = "turkish"):
language: str = "turkish",
return_timestamps: bool = False
):
"""
Converts audio to text using the pre-trained speech recognition model.
Expand All @@ -57,15 +58,15 @@ def __call__(
Returns:
str: Transcribed text from the audio.
"""

pipe = pipeline(
"automatic-speech-recognition",
model=self.model,
chunk_length_s=chunk_length_s,
stride_length_s=stride_length_s,
max_new_tokens=max_new_tokens,
batch_size=100,
device_map="auto",
return_timestamps=True,
batch_size=batch_size,
return_timestamps=return_timestamps,
tokenizer=self.processor.tokenizer,
feature_extractor=self.processor.feature_extractor,
model_kwargs={"use_flash_attention_2": True},
Expand Down

0 comments on commit 3b41a84

Please sign in to comment.