-
Notifications
You must be signed in to change notification settings - Fork 1
/
aud_vid_recog.py
117 lines (78 loc) · 3.69 KB
/
aud_vid_recog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import speech_recognition as sr
from audio_dicer import AudioDicer
import os
import time
import moviepy.editor as mp
import shutil
import logging
##Created by Lex Whalen 2/19/21
class AudioVideoRecognizer():
"""Deals with audio recognition. Uses Google Translate for .wav files. If a video, converts to .wav then uses Google Translate."""
def __init__(self):
self.RECOG = sr.Recognizer()
self.DICER = AudioDicer()
self.CWD = os.getcwd()
self.TEMP_AUD = os.path.join(self.CWD,"temp_aud")
if os.path.exists(self.TEMP_AUD):
shutil.rmtree(self.TEMP_AUD)
os.makedirs(self.TEMP_AUD)
self.log = logging.getLogger('subtitle_logging')
def trash_file(self,file_path):
#sends file to trash
os.remove(file_path)
def slice_aud(self,file_path):
"""Dices audio into SECONDS seconds. I did 30 in the video, but 45 works better."""
#logs splice
self.log.info("Splicing audio file: %s", file_path)
SECONDS = 30
self.DICER.multiple_split(file_path,SECONDS)
def transcribe(self,file_name,lang):
"""Transcribes the audio. Returns a list of the words found in that audio segment."""
self.log.info("Transcribing...")
with sr.WavFile(file_name) as source: #use f.wav as aud source
audio = self.RECOG.record(source) #get aud data
try:
#first try is to see if google recognizes that it is speech
try:
#second try is to see if it can make the speech out
words = [i for i in (self.RECOG.recognize_google(audio,language = lang)).split()]
#return the list of words found from GT
return words
except Exception as e:
#found no words in the audio
#return an empty list
return []
except LookupError: #unintelligible
print("Could not understand audio")
def from_file(self,f,lang, isVideo = False):
if not isVideo:
#already working with only an audio file
words = self.transcribe(f,lang)
return words
elif isVideo:
#convert to wav and read
filename = os.path.basename(f)
filename_no_extension = os.path.splitext(filename)[0]
aud_path_name = "{}.wav".format(filename_no_extension)
aud_path_abs = os.path.join(self.TEMP_AUD,aud_path_name)
clip = mp.VideoFileClip(f)
clip.audio.write_audiofile(aud_path_abs)
#cut the clip into 30 second pieces so google can work with them
self.slice_aud(aud_path_abs)
#throw away the base audio
self.trash_file(aud_path_abs)
#loop through the split audio, append to a list
master_words = []
for f in os.listdir(self.TEMP_AUD):
f_path = os.path.join(self.TEMP_AUD,f)
words = self.transcribe(f_path,lang)
#to prevent google from shutting us down
time.sleep(2)
#append the words we found in that audio clip to the master_words for use in creating .txt file
master_words += words
#throw away the temps
for f in os.listdir(self.TEMP_AUD):
f_path = os.path.join(self.TEMP_AUD,f)
self.trash_file(f_path)
#finally return words
return master_words