-
Notifications
You must be signed in to change notification settings - Fork 8
/
transcribe_folder.py
executable file
·115 lines (94 loc) · 4.42 KB
/
transcribe_folder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python2
import sys
import os
import glob
import subprocess
import io
GOOGLE_API_CREDENTIALS = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'sage-collector-250907-e0be5d8f9867.json')
def transcribe_file(wave_file, dest_num, lang='en-US'):
"""Transcribe the given audio file asynchronously."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
try:
if not wave_file:
print('No wave file provided for transcription')
return
# print('Transcribing ', wave_file)
client = speech.SpeechClient()
with io.open(wave_file, 'rb') as audio_file:
content = audio_file.read()
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
language_code=lang)
# synchronous transcription
result = client.recognize(config, audio)
# prepare filename for the transcript (we need to use a temporary separate file for each call)
transcript_filename = wave_file+'.txt'
# write into file destination number, converted text and confidence interval
with open(transcript_filename, 'w', os.O_NONBLOCK) as transcript_file:
transcript_file.write('Destination Number: {}\n'.format(dest_num))
no_transcript = True
if result.results:
alternatives = result.results[0].alternatives
if alternatives:
no_transcript = False
for alternative in alternatives:
transcript_file.write('Transcript: {}\n'.format(alternative.transcript))
transcript_file.write('Confidence: {}\n'.format(alternative.confidence))
if no_transcript:
transcript_file.write('No transcript available\n\n')
else:
transcript_file.write('\n')
except Exception as e:
print('Cannot transcribe file')
def print_progress(current, total):
percent = 100*float(current)/total
sys.stdout.write("\r%d%%" % percent)
sys.stdout.flush()
def process_folder(path):
print('Convert all found amr files to wav')
amr_path = os.path.join(path, '*.amr')
for amr_filename in glob.glob(amr_path):
wav_filename = os.path.splitext(amr_filename)[0] + '.wav'
subprocess.call(['sox', amr_filename, wav_filename])
print('Transcribe existing wav files')
wave_path = os.path.join(path, '*.wav')
wave_files_num = len(glob.glob(wave_path))
count = 0
for wave_filename in glob.glob(wave_path):
print_progress(count, wave_files_num)
count += 1
wave_basename = os.path.basename(wave_filename)
dest_num = os.path.splitext(wave_basename)[0]
transcribe_file(wave_filename, dest_num)
print('\nMerge all transcriptions into a single text file and delete wav files')
transcript_filename = os.path.join(path, 'transcripts.txt')
if os.path.isfile(transcript_filename):
# rename existing transcripts file
os.rename(transcript_filename, transcript_filename+'.old')
with open(transcript_filename, 'w', os.O_NONBLOCK) as transcript_file:
for wave_filename in glob.glob(wave_path):
temp_transcript_filename = wave_filename + '.txt'
if os.path.isfile(temp_transcript_filename):
with open(temp_transcript_filename, 'r', os.O_NONBLOCK) as temp_transcript_file:
for line in temp_transcript_file:
transcript_file.write(line)
os.remove(temp_transcript_filename)
os.remove(wave_filename)
if __name__ == '__main__':
if 1 < len(sys.argv):
path = sys.argv[1]
if os.path.isdir(path):
# set environment variable for Google Cloud Speech API application
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GOOGLE_API_CREDENTIALS
# process folder
process_folder(path)
else:
print('The provided path', path, 'is not a folder')
else:
print('This script should be called with the path of the audio files to translate as argument')
print('The provided folder must contain either amr or wav files')
print('The output is a text file, transcripts.txt, with all transcripts')
print(sys.argv[0], ' <audio files path>')