-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path0_main_vtt_generation.py
117 lines (94 loc) · 3.83 KB
/
0_main_vtt_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# !/usr/bin/env python3
import yaml # pip install PyYAML
import os
import json
import time
import subprocess
import utils
import datetime
import shutil
from webvtt import WebVTT, Caption # pip install webvtt-py
from vosk import Model, KaldiRecognizer, SetLogLevel # pip install vosk
# authentication information
path_base = os.path.dirname(os.path.abspath(__file__))
auth_config = path_base + "/config/auth.yaml"
with open(auth_config) as f:
auth = yaml.load(f, Loader=yaml.FullLoader)
client_id = auth["client_id"]
client_secret = auth["client_secret"]
# ================================================================
# ================================================================
# paths of the cli and data
path_twitch_ffmpeg = path_base + "/thirdparty/ffmpeg-4.3.1-amd64-static/ffmpeg"
path_root = path_base + "/../data/"
path_model = path_base + "/thirdparty/vosk-model-small-en-us-0.15/"
# ================================================================
# ================================================================
# setup control+c handler
utils.setup_signal_handle()
channel = "sodapoppin"
# find the live video files
files_names = []
files_out = []
for subdir, dirs, files in os.walk(path_root + "/" + channel + "/"):
for file in files:
if utils.terminated_requested:
break
ext = file.split(os.extsep)
if len(ext) != 2:
continue
if ext[1] == "mp4" and "_" not in file:
files_out.append(os.path.join(subdir, ext[0]+".vtt"))
files_names.append(os.path.join(subdir, file))
print(os.path.join(subdir, ext[0]+".vtt"))
if utils.terminated_requested:
break
print("found "+str(len(files_out))+" videos found to process")
# loop through each video and convert it using ffmpeg
for ct in range(len(files_names)):
# check if we should download any more
if utils.terminated_requested:
print('terminate requested, not downloading any more..')
break
# check if old enough to process
oldness = time.time()-os.path.getmtime(files_names[ct])
if oldness < 60:
print("skipping "+files_names[ct]+" since it is only "+str(oldness)+" sec old")
continue
# AUDIO-TO-TEXT: check if file exists
file_path_webvtt = files_out[ct]
print("transcribing: " + file_path_webvtt)
if not utils.terminated_requested and os.path.exists(files_names[ct]) and not os.path.exists(file_path_webvtt):
t0 = time.time()
# open the model
SetLogLevel(-1)
sample_rate = 16000
model = Model(path_model)
rec = KaldiRecognizer(model, sample_rate)
rec.SetWords(True)
# open ffmpeg pipe stream of the audio file (from video)
command = [path_twitch_ffmpeg, '-nostdin', '-loglevel', 'quiet', '-i', files_names[ct],
'-ar', str(sample_rate), '-ac', '1', '-f', 's16le', '-']
# process = subprocess.Popen(command, stdout=subprocess.PIPE)
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
results = []
while True:
data = process.stdout.read(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
text = rec.Result()
results.append(text)
results.append(rec.FinalResult())
# convert to standard format
vtt = WebVTT()
for i, res in enumerate(results):
words = json.loads(res).get('result')
if not words:
continue
for word in words:
start = utils.webvtt_time_string(word['start'])
end = utils.webvtt_time_string(word['end'])
vtt.captions.append(Caption(start, end, word['word']))
vtt.save(file_path_webvtt)
print("done in " + str(time.time() - t0) + " seconds\n")