-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_audio_tape_lengths.py
149 lines (137 loc) · 4.1 KB
/
extract_audio_tape_lengths.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import datetime
import csv
path_base = "HSLLD"
corpus = ["HV1", "HV2", "HV3", "HV5", "HV7"]
ending = "MT"
times_and_files = []
not_two = []
not_found = []
solution_file_name_endings = """admmt1.cha
aimmt1.cha
allmt1.cha
anamt1.cha
annmt1.cha
aprmt1.cha
bramt1.cha
brimt1.cha
brnmt1.cha
brtmt1.cha
casmt1.cha
conmt1.cha
davmt1.cha
diamt1.cha
emimt1.cha
ethmt1.cha
geomt1.cha
gilmt1.cha
gremt1.cha
guymt1.cha
jacmt1.cha""".split()
file_paths = []
# -- ALL FILES --
for corp in corpus:
dir_path = "/".join([path_base, corp, ending])
for file_name in os.listdir(dir_path):
file_path = dir_path + '/' + file_name
file_paths.append(file_path)
#
# # -- JUST SOLUTIONS --
# for ending in solution_file_name_endings:
# file_path = "HSLLD/HV1/MT/" + ending
# file_paths.append(file_path)
for file_path in file_paths:
with open(file_path) as f:
lines = f.readlines()
comments = []
for line in lines:
if line.startswith("@Comment:\tTime is"):
comments.append(line)
if len(comments) == 0:
print("comment not found: {}".format(file_path))
not_found.append(file_path)
else:
# if len(comments) > 1:
# two_comments.append((file_path, comments))
# # print(lines[-2])
two_comments = []
for line in comments:
time_str = (line.split()[-1])
try:
minute_str, second_str = time_str.split(':')
minute = int(minute_str)
second = int(second_str)
file_time = datetime.timedelta(seconds=second, minutes=minute)
two_comments.append(file_time)
# times_and_files.append((file_time, file_path))
except ValueError:
print("ValueError")
print(line)
if len(two_comments) < 2:
print(file_path)
print(comments)
print(two_comments)
not_two.append((two_comments, file_path))
else:
times_and_files.append((file_path, two_comments))
print("number of files with times:")
print(len(times_and_files))
times = zip(*times_and_files)[1]
print(times[0])
seconds = []
for time_range in times:
end = time_range[-1]
start = time_range[0] #chuck out all times in the middle
delta = end - start
seconds.append(delta.total_seconds())
# seconds = [(x[0]-x[1]).total_seconds() for x in times]
#
# times = zip(*times_and_files)[0]
# print(times)
# seconds = [t.total_seconds() for t in times]
print(seconds)
files_and_times_and_seconds = zip(times_and_files, seconds)
more_than_24 = list(filter(lambda x: x[1] > 60*24, files_and_times_and_seconds))
# print(len(more_than_30))
# print("More than 24 mins")
print('getting secodns')
just_seconds = [x[1] for x in more_than_24]
print(just_seconds)
print(more_than_24)
avg = sum(just_seconds)/float(len(just_seconds))
print(len(just_seconds))
print(avg)
print("average")
print(avg / 60)
for files_and_times, seconds in more_than_24:
print(files_and_times[0])
print('finished')
# print(max(seconds))
# i = seconds.index(max(seconds))
# print(times_and_files[i])
# print(min(seconds))
# i = seconds.index(min(seconds))
# print(times_and_files[i])
# for x in two_comments:
# print(x[0])
# print(x[1])
# print(len(two_comments))
print("not found:")
print(not_found)
print(len(not_found))
print("not two comments:")
print(not_two)
print(len(not_two))
with open("extract_file_names_results.csv", "w") as f:
writer = csv.writer(f)
writer.writerow(["fileName", "total seconds", "comments"])
for i in range(len(times_and_files)):
file_name = times_and_files[i][0]
time_split = int(seconds[i])
row = [file_name, time_split, '']
writer.writerow(row)
# f.write("{},{},{}:{},\n".format(file_name, time_split, time_split/60, time_split%60))
# print("{},{}".format(file_name, time_split))
for comments, file_path in not_two:
row = [file_path, "", "time not found:"]
writer.writerow(row)