forked from adamcohenhillel/ADeus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
whisper_service.py
76 lines (62 loc) · 2.8 KB
/
whisper_service.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from flask import Flask, request, jsonify
import whisper
from werkzeug.utils import secure_filename
import os
from datetime import datetime
from pydub import AudioSegment
from transcribeHallu import loadModel
from transcribeHallu import transcribePrompt
app = Flask(__name__)
# It's a good practice to limit the maximum upload size, e.g., 16MB
# app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
# Define the path for saving uploaded files (temporary)
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
def boost_volume(file_path, decibels=10):
audio_segment = AudioSegment.from_file(file_path)
louder_segment = audio_segment + decibels # Increase volume by `decibels` dB
louder_segment.export(file_path, format="wav")
def transcribe_audio(file_path):
model = whisper.load_model("base")
result = model.transcribe(file_path, language="en", compression_ratio_threshold=1.35, temperature=0)
return result['text']
@app.route('/transcribe', methods=['POST'])
def transcribe():
if 'file' not in request.files:
return jsonify(error="No file part"), 400
file = request.files['file']
if file.filename == '':
return jsonify(error="No selected file"), 400
if file:
filename = secure_filename(file.filename)
# Add timestamp to filename to avoid overwrites
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
filename_with_timestamp = f"{timestamp}_{filename}"
save_path = os.path.join(app.config['UPLOAD_FOLDER'], filename_with_timestamp)
file.save(save_path)
# Boost the volume before transcribing
boost_volume(save_path, decibels=50) # Adjust the decibels as necessary
##### The audio language may be different from the one for the output transcription.
path = save_path
lngInput = "en"
# ##### Activate this for music file to get a minimal processing
isMusic = False
# ##### Need to be adapted for each language.
# ##### For prompt examples, see transcribeHallu.py getPrompt(lng:str)
lng = "en"
prompt = "Whisper, Ok. "\
+"A pertinent sentence for your purpose in your language. "\
+"Ok, Whisper. Whisper, Ok. "\
+"Ok, Whisper. Whisper, Ok. "\
+"Please find here, an unlikely ordinary sentence. "\
+"This is to avoid a repetition to be deleted. "\
+"Ok, Whisper. "
# ##### Model size to use
modelSize = "medium"
loadModel("0",modelSize=modelSize)
result = transcribePrompt(path=path, lng=lng, prompt=prompt, lngInput=lngInput,isMusic=isMusic)
# Optionally, remove the file after processing
os.remove(save_path)
return jsonify(transcription=result)