-
Notifications
You must be signed in to change notification settings - Fork 0
/
TestMain.py
executable file
·250 lines (214 loc) · 9.04 KB
/
TestMain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# coding: utf-8
# ## The Main file to test the models
# In[1]:
import os
from keras.models import Model, load_model
#import audioUtils
import numpy as np
import librosa
import SpeechModels
#from kapre.time_frequency import Melspectrogram, Spectrogram
#from kapre.utils import Normalization2D
from keras.layers import Add, Dropout, BatchNormalization, Conv2D, Reshape, MaxPooling2D, Dense, LSTM, Bidirectional
# In[2]:
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
#np.set_printoptions(threshold=np.nan)
#Load the CNN model
modelRootDict = "trained_models"
testRootDict = "../DysSpeech_Corpora/Linzy/linzyCut/test/"
_cnn_model_path = os.path.join(".",modelRootDict,"CNN2_model.h5")
#_rnn_model_path = os.path.join(".",modelRootDict,"AttRNN_model.h5")
_dnn_model_path = os.path.join(".",modelRootDict,"DNN_model.h5") #DNN_model
#_custom_objects={'Melspectrogram':Melspectrogram(),'Normalization2D':Normalization2D(int_axis=0)}
global theCNNModel
def __load_model(modelPath):
#theModel = load_model(modelPath,_custom_objects)
theModel = load_model(modelPath)
return theModel
#DYSCmdCategsDigit = {0 : 'unknown', 1:'one', 2:'two', 3:'three', 4:'four', 5:'five', 6:'six', 7:'seven', 8:'eight', 9:'nine', 10:'close', 11:'up',
# 12:'down', 13:'previous', 14:'next', 15:'in', 16:'out', 17:'left', 18:'right', 19:'home'}
DYSCmdCategsDigit = {0:'one', 1:'two', 2:'three', 3:'four', 4:'five', 5:'six', 6:'seven', 7:'eight', 8:'nine', 9:'close', 10:'up',
11:'down', 12:'previous', 13:'next', 14:'in', 15:'out', 16:'left', 17:'right', 18:'home'}
DYSCmdCategsDigit2 = {1: 'one', 2: 'two', 3: 'three', 4: 'four', 5: 'five', 6: 'six', 7: 'seven', 8: 'eight', 9: 'nine', 10: 'close', 11: 'up',
12: 'down', 13: 'previous', 14: 'next', 15: 'in', 16: 'out', 17: 'left', 18: 'right', 19: 'home'}
# In[3]:
def _read_test_wav(wav_file):
'''
read the wav file and convert wav to numpy array.
'''
#testfile = os.path.join("..",'Linzy', testRootDict, wav_file)
#print("reading the test wav file:",testfile)
print("reading the test wav file:",wav_file)
return librosa.load(wav_file)
'''
def _read_test_npy(npy_file):
curX = np.load(_f)
#check equal,smaller, or bigger
#and truncate or padding
#curX could be bigger or smaller than self.dim
if curX.shape[0] == iLen:
return curX
#print('Same dim')
elif curX.shape[0] > iLen: #bigger#we can choose any position in curX-self.dim
randPos = np.random.randint(curX.shape[0]-iLen)
curX = curX[randPos:randPos+iLen]
#print('File dim bigger')
else: #smaller
randPos = np.random.randint(iLen-curX.shape[0])
curX[i,randPos:randPos+curX.shape[0]] = curX
return curX
'''
# In[4]:
def _adjustShape(rawArray):
X = np.empty((1,16000))
if rawArray[0] == 16000:
X = rawArray
#print('Same dim')
elif rawArray.shape[0] > 16000: #bigger
#we can choose any position in curX-self.dim
randPos = np.random.randint(rawArray.shape[0]-16000)
X = rawArray[randPos:randPos+16000]
#print('File dim bigger')
else: #smaller
randPos = np.random.randint(16000-rawArray.shape[0])
X[i,randPos:randPos+rawArray.shape[0]] = rawArray
#print('File dim smaller')
return X
def _adjustNPYShape(rawArray):
X = np.empty(16000)
len_of_rawArray = len(rawArray)
print("length of rawArray is : ",len_of_rawArray)
if len_of_rawArray == 16000:
X = rawArray
#print('Same dim')
elif len_of_rawArray > 16000: #bigger
#we can choose any position in curX-self.dim
randPos = np.random.randint(len_of_rawArray-16000)
X = rawArray[randPos:randPos+16000]
#print('File dim bigger')
else: #smaller
randPos = np.random.randint(16000-len_of_rawArray)
print("smaller part - randPos : ",randPos)
print("rawArray[0] : ",rawArray[0])
X[0:len_of_rawArray] = rawArray
#print('File dim smaller')
return X
def main2():
#print("Loading the Model...........")
#__theDNNModel = __load_model(_dnn_model_path)
print("Reading the test npy file.........")
#y_npy = np.load("../linzyCut/test/LinZY03_13_6_previous.wav.npy")
y_wav = _read_test_wav("../linzyCut/test/LinZY03_13_6_previous.wav")
print("y_npy's class name: ",y_npy.__class__.__name__)
print("y_wav's class name: ",y_wav.__class__.__name__)
print("y_npy's shape:",y_npy.shape)
print("y_wav length ",len(y_wav))
print("y_wav[0] type: ",y_wav[0].__class__.__name__)
print("length of y_wav[0] : ",len(y_wav[0]))
def main3():
__theCNNModel = __load_model(_cnn_model_path)
filepath_ = testRootDict + "LinZY03_07_6.wav"
raw_res, predict_res = predict(filepath_,__theCNNModel)
#print(DYSCmdCategsDigit2)
print("predict_result is ", predict_res)
y_result_list = list(raw_res[0])
print("y_result is ", raw_res)
maxValue = max(y_result_list)
idxOfMaxValue = y_result_list.index(maxValue)
print("max value is {}, and at position {}".format(maxValue, idxOfMaxValue))
if idxOfMaxValue == 1:
print("get hit")
print("====================================")
for idx in range(19):
print("y_result {0} element is: {1:6f} ==== {2}".format(
idx+1, raw_res[0, idx], DYSCmdCategsDigit2.get(idx+1)))
feature_dim_1 = 20
feature_dim_2 = 11
channel = 1
def predict(filepath, model):
sample = wav2mfcc(filepath)
sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel)
rawres = model.predict(sample_reshaped)
return rawres, np.argmax(rawres)
'''
return get_labels()[0][
np.argmax(model.predict(sample_reshaped))
]
'''
def wav2mfcc(file_path, max_len=11):
wave, sr = librosa.load(file_path, mono=True, sr=None)
wave = wave[::3]
mfcc = librosa.feature.mfcc(wave, sr=16000)
# If maximum length exceeds mfcc lengths then pad the remaining ones
if (max_len > mfcc.shape[1]):
pad_width = max_len - mfcc.shape[1]
mfcc = np.pad(mfcc, pad_width=(
(0, 0), (0, pad_width)), mode='constant')
# Else cutoff the remaining parts
else:
mfcc = mfcc[:, :max_len]
return mfcc
def main():
print("Loading the Model...........")
__theCNNModel = __load_model(_cnn_model_path)
#__theDNNModel = __load_model(_dnn_model_path)
#__theRNNModel = __load_model(_rnn_model_path)
print("Reading the test wav file.........")
# _read_test_wav("../linzyCut/test/LinZY03_01_6_one.wav")
raw_y, sr = librosa.load("../linzyCut/test/LinZY03_13_6_previous.wav")
len_of_rawy = len(raw_y)
#y_npy = np.load("../linzyCut/test/LinZY03_13_6_previous.wav.npy")
y = _adjustNPYShape(raw_y)#np.transpose(_adjustShape(y))
#print("has values part is {} : ",y[0:len_of_rawy])
#__theCNNModel.summary()
#__theDNNModel.summary()
print("type of y : ", y.__class__.__name__)
print("shape of y is ", y.shape)
input_y = y.reshape(1,16000)
#y_result = __theDNNModel.predict(input_y)
lblNum = 1
y_result = __theCNNModel.predict(input_y)
print("y_result is ",y_result)
y_result_list = list(y_result[0])
maxValue = max(y_result_list)
idxOfMaxValue = y_result_list.index(maxValue)
print("max value is {}, and at position {}".format(maxValue,idxOfMaxValue))
if idxOfMaxValue == lblNum:
print("get hit")
print("====================================")
for idx in range(19):
print("y_result {0} element is: {1:6f} ==== {2}".format(
idx, y_result[0, idx], DYSCmdCategsDigit.get(idx)))
#print("content of input_y : ", input_y)
#print("length is input_y: ",len(input_y))
#print("shape's input_y is ",input_y.shape)
'''
idx2 = 0
lblNum = 1
gethit = 0
totaltry = 10
while idx2 < totaltry:
#y_result = __theDNNModel.predict(input_y)
y_result = __theCNNModel.predict(input_y)
#y_result = __theRNNModel.predict(input_y)
idx2 += 1
y_result_list = list(y_result[0])
maxValue = max(y_result_list)
idxOfMaxValue = y_result_list.index(maxValue)
if idxOfMaxValue == lblNum:
gethit += 1
print("get hit the {}-th time".format(gethit))
print("====================================")
for idx in range(19):
print("y_result {0} element is: {1:6f} ==== {2}".format(idx, y_result[0,idx], DYSCmdCategsDigit.get(idx)))
'''
print("the shape of y_result is : ",y_result.shape)
#print("The total accuracy is {0:6f}".format(gethit/totaltry))
#print("The predicting result is:",y_result)
# In[ ]:
if __name__ == "__main__":
#npary, sample_rate = _read_test_wav("Ho1226_8894_0.wav")
#print("NumPy Arrary of testing file is {0} and sample rate is {1}".format(npary, sample_rate))
#print("Loading the Model...........")
#__load_model(_cnn_model_path)
main3()