-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtest.py
178 lines (132 loc) · 5.51 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
from typing import Dict
import editdistance
import numpy as np
import tensorflow as tf
from testExecutor import TestExecutor
from speechModel import SpeechModel
import itertools
class TestStatistics:
def __init__(self):
self.decodings_counter = 0
self.sum_letter_edit_distance = 0
self.sum_letter_error_rate = 0
self.sum_word_edit_distance = 0
self.sum_word_error_rate = 0
self.letter_edit_distance = 0
self.letter_error_rate = 0
self.word_edit_distance = 0
self.word_error_rate = 0
def track_decoding(self, decoded_str, expected_str):
"""
Uses editdistance to calculate the distance between the letter differenced between
two words. So that we know how many letters were incorrectly predicted.
Does the same with word using editdistance to find WED.
Letter Error rate is simply the LED/length of the actual string from transcript.
Word Error Rate (WER) is the WED/length of actual word.
"""
self.letter_edit_distance = editdistance.eval(expected_str, decoded_str)
self.letter_error_rate = self.letter_edit_distance / len(expected_str)
self.word_edit_distance = editdistance.eval(expected_str.split(), decoded_str.split())
self.word_error_rate = self.word_edit_distance / len(expected_str.split())
self.sum_letter_edit_distance += self.letter_edit_distance
self.sum_letter_error_rate += self.letter_error_rate
self.sum_word_edit_distance += self.word_edit_distance
self.sum_word_error_rate += self.word_error_rate
self.decodings_counter += 1
@property
def global_letter_edit_distance(self):
return self.sum_letter_edit_distance / self.decodings_counter
@property
def global_letter_error_rate(self):
return self.sum_letter_error_rate / self.decodings_counter
@property
def global_word_edit_distance(self):
return self.sum_word_edit_distance / self.decodings_counter
@property
def global_word_error_rate(self):
return self.sum_word_error_rate / self.decodings_counter
class Test(TestExecutor):
def create_sample_generator(self, limit_count: int):
return self.reader.load_samples('test', loop_infinitely=False, limit_count=limit_count, feature_type='power')
def get_loader_limit_count(self):
return 0
def get_max_steps(self):
if 0:
return 0
return None
def run(self):
stats = TestStatistics()
with tf.Session() as sess:
model = self.create_model(sess)
coord = self.start_pipeline(sess)
try:
print('Testing on sample audio...')
if 0:
step_iter = range(0)
else:
step_iter = itertools.count()
for step in step_iter:
if coord.should_stop():
break
should_save = True and step == 0
self.run_step(model, sess, stats, should_save)
except tf.errors.OutOfRangeError:
print('Testing is done.')
finally:
coord.request_stop()
self.print_global_statistics(stats)
coord.join()
@staticmethod
def print_global_statistics(stats):
"""
Final results in the form of LED and WED (which basically tells how many letters and words were incorrectly predicted)
"""
print('Final Results')
print('LED: {} WED: {}'.format(stats.global_letter_edit_distance,stats.global_word_edit_distance))
def run_step(self, model: SpeechModel, sess: tf.Session, stats: TestStatistics, save: bool, verbose=True, feed_dict: Dict=None):
"""
Probably the hardest function. Has a lot going on but I will try to be as descriptive as possible.
"""
global_step = model.global_step.eval()
if save:
# Validate on the data set and write the summary
avg_loss, decoded, label, summary = model.step(sess, update=False, decode=True, return_label=True, summary=True, feed_dict=feed_dict)
model.summary_writer.add_summary(summary, global_step)
else:
# simply validate, no need to write the summary.
avg_loss, decoded, label = model.step(sess, update=False, decode=True, return_label=True, feed_dict=feed_dict)
decoded_ids_paths = [Test.extract_decoded_ids(path) for path in decoded]
for label_ids in Test.extract_decoded_ids(label):
expected_str = self.idsToSentence(label_ids)
# Print the actual transcript text and the decoded (predicted) text
# along with it, print the LED and WED so that we know how many letters and
# words were incorrectly predicted.
if verbose:
print('Actual: {}'.format(expected_str))
for decoded_path in decoded_ids_paths:
decoded_ids = next(decoded_path)
decoded_str = self.idsToSentence(decoded_ids)
stats.track_decoding(decoded_str, expected_str)
if verbose:
print('Predicted: {}'.format(decoded_str))
print('LED: {} WED: {}'.format(stats.letter_edit_distance,stats.word_edit_distance))
@staticmethod
def extract_decoded_ids(sparse_tensor):
ids = []
last_batch_id = 0
for i, index in enumerate(sparse_tensor.indices):
batch_id, char_id = index
if batch_id > last_batch_id:
yield ids
ids = []
last_batch_id = batch_id
ids.append(sparse_tensor.values[i])
yield ids
def idsToSentence(self, identifiers):
return ''.join(self.idToLetter(identifier) for identifier in identifiers)
def idToLetter(self, identifier):
if identifier == 27:
return ' '
if identifier == 26:
return '\''
return chr(identifier + ord('a'))