-
Notifications
You must be signed in to change notification settings - Fork 0
/
transcribe.py
executable file
·173 lines (142 loc) · 5.28 KB
/
transcribe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/env python3
import argparse
import os
import requests
import sys
import pyperclip
from pathlib import Path
from whisper import load_model, transcribe
import torch
from tqdm import tqdm
from pydub import AudioSegment
from pydub.utils import make_chunks
import warnings
import gc # For garbage collection
import time # For adding a small delay between chunks
import whisper # Add this import
warnings.filterwarnings("ignore")
def parse_args():
""" Parse command-line arguments for audio file path or URL """
parser = argparse.ArgumentParser(description="Transcribe audio files using Whisper")
parser.add_argument("audio_source", help="Path or URL to the audio file")
parser.add_argument(
"-o", "--output",
help="Directory to save the transcription file (optional)",
type=str,
default=None
)
if len(sys.argv) < 2:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
# Validate output directory if specified
if args.output:
output_path = Path(args.output)
if not output_path.exists():
print(f"Error: Output directory '{args.output}' does not exist")
sys.exit(1)
if not output_path.is_dir():
print(f"Error: '{args.output}' is not a directory")
sys.exit(1)
return args
def download_audio(url):
""" Download audio file from URL with streaming """
try:
response = requests.get(url, stream=True)
response.raise_for_status()
filename = url.split("/")[-1]
total_size = int(response.headers.get('content-length', 0))
with open(filename, 'wb') as f, tqdm(
desc="Downloading",
total=total_size,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as pbar:
for data in response.iter_content(chunk_size=1024):
size = f.write(data)
pbar.update(size)
return filename
except requests.exceptions.RequestException as e:
print(f"Error downloading file: {e}")
sys.exit(1)
def validate_audio_file(path):
"""Validate that the audio file exists and has the correct extension"""
file_path = Path(path)
if not file_path.exists():
print(f"Error: File '{path}' does not exist")
sys.exit(1)
if not file_path.suffix.lower() in ['.mp3', '.wav', '.m4a', '.ogg']:
print(f"Error: File '{path}' is not a supported audio format (mp3, wav, m4a, or ogg)")
sys.exit(1)
return str(file_path)
def transcribe_audio(audio_path, output_dir=None):
print("Loading model...")
model = whisper.load_model("tiny.en")
try:
print("Loading audio file...")
audio = AudioSegment.from_file(audio_path)
# Split into 30-second chunks
chunk_length = 30 * 1000 # 30 seconds in milliseconds
chunks = make_chunks(audio, chunk_length)
print(f"\nProcessing {len(chunks)} chunks...")
transcription = ""
for i, chunk in enumerate(tqdm(chunks, desc="Transcribing")):
# Export chunk to temporary file
temp_path = f"temp_chunk_{i}.wav"
chunk.export(temp_path, format="wav")
# Transcribe chunk
result = model.transcribe(
temp_path,
fp16=False,
language="en",
task="transcribe",
best_of=1,
beam_size=1
)
transcription += result["text"] + " "
# Clean up temporary file
os.remove(temp_path)
# Small delay to prevent potential memory issues
time.sleep(0.1)
# Copy to clipboard
try:
pyperclip.copy(transcription)
print("\nTranscription copied to clipboard!")
except Exception as e:
print(f"\nWarning: Could not copy to clipboard: {e}")
print("\nTranscription:")
print(transcription)
# Save to file if output directory specified
if output_dir:
base_filename = os.path.splitext(os.path.basename(audio_path))[0]
output_path = Path(output_dir) / f"{base_filename}.txt"
try:
with open(output_path, 'w') as f:
f.write(transcription)
print(f"\nTranscription saved to: {output_path}")
except Exception as e:
print(f"Error saving transcription file: {e}")
sys.exit(1)
except Exception as e:
print(f"Error during transcription: {e}")
sys.exit(1)
finally:
del model
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
def main():
args = parse_args()
audio_path = args.audio_source
if audio_path.startswith(('http://', 'https://')):
audio_path = download_audio(audio_path)
else:
audio_path = validate_audio_file(audio_path)
try:
transcribe_audio(audio_path, args.output)
except Exception as e:
print(f"Error during transcription: {e}")
sys.exit(1)
if __name__ == "__main__":
main()