-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_features.py
76 lines (74 loc) · 4.31 KB
/
generate_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from config import config
import os
import librosa
import numpy as np
from tqdm import tqdm
import h5py
def generate_features(valid=0.2):
if os.path.isfile(config['dataset']):
print('dataset already exist')
return
print(config['dataset_feature'])
features = [np.array([]) for _ in range(3)]
labels = [np.array([]) for _ in range(3)]
save_fcn = lambda x, f : f if x.size == 0 else np.vstack((x, f))
langs = ['train_english', 'train_hindi', 'train_mandarin']
if not os.path.isfile(config['dataset_feature']):
for i, lang in enumerate(langs):
print(f'start process {lang[6:]}')
files = os.listdir(os.path.join(config['data_path'], lang))
for f in tqdm(sorted(files)):
y, sr = librosa.load(os.path.join(config['data_path'], lang, f), sr=16000)
intvls = librosa.effects.split(y)
feat = np.array([])
label = np.array([])
for j, intvl in enumerate(intvls):
#label = i
if j == 0 and intvl[0] != 0:
feature = librosa.feature.mfcc(y=y[:intvl[0]], sr=sr, n_mfcc=64, n_fft=int(sr * 0.025), hop_length=int(sr * 0.010))
#label = -1 # mark silence
feat = save_fcn(feat, feature.T)
label = save_fcn(label, (-1 * np.ones((feature.shape[1], 1))))
feature = librosa.feature.mfcc(y=y[intvl[0]:intvl[1]], sr=sr, n_mfcc=64, n_fft=int(sr * 0.025), hop_length=int(sr * 0.010))
feat = save_fcn(feat, feature.T)
label = save_fcn(label, (i * np.ones((feature.shape[1], 1))))
if intvl[1] != len(y):
feature = librosa.feature.mfcc(y=y[intvl[1]:intvls[j + 1][0] if j + 1 < intvls.shape[0] else len(y)], sr=sr, n_mfcc=64, n_fft=int(sr * 0.025), hop_length=int(sr * 0.010))
#label = -1 # mark silence
feat = save_fcn(feat, feature.T)
label = save_fcn(label, (-1 * np.ones((feature.shape[1], 1))))
rows = (feat.shape[0] // config['sequence_length']) * config['sequence_length']
features[i] = save_fcn(features[i], feat[:rows].reshape((-1, config['sequence_length'], 64)))
labels[i] = save_fcn(labels[i], label[:rows].reshape((-1, config['sequence_length'], 1)))
print(features[i].shape)
with h5py.File(config['dataset_feature'], 'w') as hf:
for i in range(3):
hf.create_dataset(f'features[{i}]', data=features[i])
hf.create_dataset(f'labels[{i}]', data=labels[i])
else:
print('loading features...')
# make random reproducible
np.random.seed(976)
with h5py.File(config['dataset_feature'], 'r') as hf:
for i in range(3):
print(f'features[{i}]...')
features[i] = hf[f'features[{i}]'][:]
labels[i] = hf[f'labels[{i}]'][:]
perm = np.random.permutation(features[i].shape[0])
features[i] = features[i][perm]
labels[i] = labels[i][perm]
X_train = np.concatenate((features[0][:int((1 - valid) * features[0].shape[0])], features[1][:int((1 - valid) * features[1].shape[0])], features[2][:int((1 - valid) * features[2].shape[0])]), axis=0)
y_train = np.concatenate((labels[0][:int((1 - valid) * labels[0].shape[0])], labels[1][:int((1 - valid) * labels[1].shape[0])], labels[2][:int((1 - valid) * labels[2].shape[0])]), axis=0)
X_val = np.concatenate((features[0][int((1 - valid) * features[0].shape[0]):], features[1][int((1 - valid) * features[1].shape[0]):], features[2][int((1 - valid) * features[2].shape[0]):]), axis=0)
y_val = np.concatenate((labels[0][int((1 - valid) * labels[0].shape[0]):], labels[1][int((1 - valid) * labels[1].shape[0]):], labels[2][int((1 - valid) * labels[2].shape[0]):]), axis=0)
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)
with h5py.File(config['dataset'], 'w') as hf:
hf.create_dataset('X_train', data=X_train)
hf.create_dataset('y_train', data=y_train)
hf.create_dataset('X_val', data=X_val)
hf.create_dataset('y_val', data=y_val)
if __name__ == '__main__':
generate_features()