-
Notifications
You must be signed in to change notification settings - Fork 2
/
model_trainer.py
64 lines (55 loc) · 1.8 KB
/
model_trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import numpy as np
import dataset
import joblib
if __name__ == '__main__':
clf_name = 'pre-trained'
dataset_filenames = [
'a-maps_chroma',
# 'a-maps_symbolic',
'albrecht_chroma',
# 'albrecht_symbolic',
'arthur2018-recordings_chroma',
'arthur2018-synth-shortname_chroma',
# 'arthur2018-synth-shortname_symbolic',
'billboard_chroma',
'cross-era-shortname_chroma',
'giantsteps-key_chroma',
'giantsteps-mtg-key_chroma',
'napoles2018_chroma',
# 'napoles2018_symbolic',
]
for idx, d in enumerate(dataset_filenames):
data = dataset.Dataset(d)
if idx == 0:
ensemble = data.ensemble
fulldataset_X = data.X
fulldataset_y = data.y
else:
if data.ensemble != ensemble:
print('This dataset is incompatible with other datasets')
exit()
fulldataset_X = np.concatenate((fulldataset_X, data.X))
fulldataset_y = np.concatenate((fulldataset_y, data.y))
clf = LogisticRegression(
penalty='l2',
fit_intercept=False,
dual=False,
C=0.7,
solver='lbfgs',
multi_class='auto',
max_iter=100000
)
# Feature scaling in all the feature vectors
fulldataset_X = dataset.feature_scaling(fulldataset_X)
# Data augmentation on the training sets
fulldataset_X, fulldataset_y = dataset.data_augmentation(
fulldataset_X,
fulldataset_y)
# Training
clf.fit(fulldataset_X, fulldataset_y)
# Save the model
clf_filename = '{}.joblib'.format(clf_name)
joblib.dump(clf, clf_filename)