forked from maglab-uconn/EARShot_TF2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scr_train.py
95 lines (77 loc) · 3.77 KB
/
scr_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import tensorflow as tf
import pandas as pd
from earshot.audio import *
from earshot.data import *
from earshot.model import EARSHOT
from earshot.parameters import ModelParameters
num_gpus = len(tf.config.experimental.list_physical_devices('GPU'))
# check number of available physical gpus
print("Number of GPUs Available: ", num_gpus)
# batches per gpu
batches = 2048
if num_gpus > 0:
total_batch = batches*num_gpus
else:
total_batch = batches*1
# log desired audio files, provide path to folder containing them
# NB: this path should be set in one of the 'parameters' objects
train_manifest = Manifest('/Users/kevinbrown/data/language/speech/WAV/EARSHOT_NEW_WAVS')
# generate target SRVs
# NB: this should be set in one of the 'parameters' objects
train_manifest.generate_srvs(target='Word', target_len=300, target_on=30)
# select the subset of ~1000 words
# NB: this should be set in one of the parameters objects
subset = pd.read_csv('Pronunciation_Data_1K.Modified.txt', sep='\t', names=['Word','Pronounce'])
small_subset = subset.sample(n=100)
# create training data
training_1k_df = train_manifest.calc_spec(subset=small_subset) # pre-compute spectrograms
training_1k_df.to_pickle('./earshot/tests/training-test.plk')
'''
try:
training_1k_df = pd.read_pickle('./earshot/tests/training-test.plk')
except:
print('No training pickle found, computing set.')
training_1k_df = train_manifest.calc_spec(subset=subset) # pre-compute spectrograms
training_1k_df.to_pickle('./earshot/tests/training-test.plk')
# generate category dictionary, only needed for analysis
# train_manifest.generate_category_dict()
'''
p = ModelParameters()
# instantiate model callbacks for early stopping and model checkpointing
# es = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.003, patience=250) # stop early if no significant change in loss
# NB: this is probably something that should come from the parameters object
filepath = './earshot/checkpoints/cp-{epoch:05d}.ckpt'
# save a model checkpoint after every 250 epochs
# TODO might change to every k number of samples
# NB: save frequency should be set in a parameter (along with checkpoint location)
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=filepath, monitor='loss',
verbose=1, save_freq='epoch', period=250)
# log model training history
csv_logger = tf.keras.callbacks.CSVLogger('./earshot/model_history_log.csv', append=True)
if num_gpus > 1:
# Instantiate mirrored strategy for training locally on multiple gpus
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
# generate model variable within the strategy scope
# sub-classed model requires length of target vector and model parameters
with strategy.scope():
model = EARSHOT(len(training_1k_df['Target'].iloc()[0]), p)
model.compile(loss=model.loss, optimizer=model.optimizer, run_eagerly=False)
else:
model = EARSHOT(len(training_1k_df['Target'].iloc()[0]), p)
model.compile(loss=model.loss, optimizer=model.optimizer, run_eagerly=False)
# NB: number of training epochs should also be set in a parameters structure
model.fit(
x = np.array(training_1k_df['Padded Input'].tolist()),
y = np.array(training_1k_df['Padded Target'].tolist()),
batch_size = total_batch,
epochs=5,
callbacks=[checkpoint, csv_logger, model.lr_sched],
shuffle=True
)
# # Prediction example
# # have to pass talker within a list ['JUNIOR'], but can assign multiple via ['ALLISON', 'JUNIOR', etc.]
# predict_df = train_manifest.gen_predict(['JUNIOR'], subset=subset)
# cosine_sims = Prediction(model, predict_df, training_1k)
# cosine_sims.cosine_sim_dict.keys()
# cosine_sims.plot_category_cosine('LARD', train_manifest.category_dict)