Skip to content
This repository has been archived by the owner on Feb 4, 2025. It is now read-only.

work #52

Merged
merged 2 commits into from
Dec 5, 2023
Merged

work #52

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added assets/cc_accuracy_path.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/dt_cc_path.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/dt_confusion_matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/rf_confusion_mat.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/xgboost_model1_confusion_matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/xgboost_model2_confusion_matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/xgboost_model3_confusion_matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
146 changes: 110 additions & 36 deletions src/musicNet/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import f1_score
import xgboost as xgb

path = 'src/musicNet/processed_data'
Expand All @@ -28,18 +29,58 @@
labels = ['Bach', 'Beethoven', 'Brahms', 'Mozart', 'Schubert']

dt_clf = DecisionTreeClassifier(random_state=42)

dt_clf.fit(X_train, y_train)
y_pred = dt_clf.predict(X_test)
training_accuracy = dt_clf.score(X_train, y_train)
accuracy = dt_clf.score(X_test, y_test)
print(training_accuracy)
print(accuracy)
print("Decision Tree Classifier")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}\n")
print(dt_clf.get_depth())

path = dt_clf.cost_complexity_pruning_path(X_train, y_train)
ccp_alphas, impurities = path.ccp_alphas, path.impurities
fig, ax = plt.subplots()
ax.plot(ccp_alphas[:-1], impurities[:-1], marker="o", drawstyle="steps-post")
ax.set_xlabel("Effective alpha")
ax.set_ylabel("Total impurity of leaves")
ax.set_title("Total Impurity vs Effective alpha for training set")
plt.show()
plt.close()

dt_clfs1 = []
for ccp_alpha in ccp_alphas:
dt_clf = DecisionTreeClassifier(random_state=42, ccp_alpha=ccp_alpha)
dt_clf.fit(X_train, y_train)
dt_clfs1.append(dt_clf)
print(
"Number of nodes in the last tree is: {} with ccp_alpha: {}".format(
dt_clfs1[-1].tree_.node_count, ccp_alphas[-1]
)
)

train_scores1 = [dt_clf.score(X_train, y_train) for dt_clf in dt_clfs1]
test_scores1 = [dt_clf.score(X_test, y_test) for dt_clf in dt_clfs1]

fig, ax = plt.subplots()
ax.set_xlabel("Alpha")
ax.set_ylabel("Accuracy")
ax.set_title("Accuracy vs Alpha for training and testing sets")
ax.plot(ccp_alphas, train_scores1, marker="o", label="train", drawstyle="steps-post")
ax.plot(ccp_alphas, test_scores1, marker="o", label="test", drawstyle="steps-post")
ax.legend()
plt.show()
plt.close()

dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)
ypred = dt_clf.predict(X_test)

confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()
plt.title("Decision Tree Classifier - Confusion Matrix")
plt.show()
plt.close()

Expand All @@ -49,82 +90,115 @@
rf_clf.fit(X_train, y_train)
training_accuracy = rf_clf.score(X_train, y_train)
accuracy = rf_clf.score(X_test, y_test)
print(training_accuracy)
print(accuracy)

ypred = dt_clf.predict(X_test)
y_pred = rf_clf.predict(X_test)
print("Random Forest Classifier")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}")
max_depth = 0
for tree in rf_clf.estimators_:
if max_depth < tree.get_depth():
max_depth = tree.get_depth()
print(f"Maximum depth of Random Forest: {max_depth}\n")

confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()
plt.title("Random Forest Classifier - Confusion Matrix")
plt.show()
plt.close()

bst = xgb.XGBClassifier(n_estimators=20, max_depth=15, learning_rate=0.8, objective='multi:softmax')
# ------------- XGBoost ----------------
# Traning model 1

bst = xgb.XGBClassifier(n_estimators=20, max_depth=15, learning_rate=0.8, objective='multi:softmax', verbosity=2, subsample=0.25)
# fit model
bst.fit(X_train, y_train)
bst.fit(X_train, y_train, verbose=True)
# make predictions
preds = bst.predict(X_test)
training_accuracy = bst.score(X_train, y_train)
test_accuracy = bst.score(X_test, y_test)
print(training_accuracy)
print(test_accuracy)

ypred = bst.predict(X_test)
print("XGBoost Classifier - 20 estimators, max_depth of 15, learning rate of 0.8, softmax objective function.")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}\n")

confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()
plt.title("XGBoost Classifier - Model 1 - Confusion Matrix")
plt.show()
plt.close()
# Model 1 but with table of training results

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

print(training_accuracy)
print(test_accuracy)
param = {'max_depth': 15, 'eta': 0.8, 'objective': 'multi:softmax'}
param['nthread'] = 4
param['num_class'] = 5
param['subsample'] = 0.25
param['eval_metric'] = ['auc', 'merror']
evallist = [(dtrain, 'train'), (dtest, 'eval')]

num_round = 20
bst = xgb.train(param, dtrain, num_round, evals=evallist, early_stopping_rounds=20)
bst.save_model('src\\musicNet\\saved_models\\bt\\austin1.model')
bst.dump_model('src\\musicNet\\saved_models\\bt\\dump.raw.txt')

ypred = bst.predict(dtest)
confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()
plt.title("XGBoost Classifier - Model 1 - Confusion Matrix")
plt.show()
plt.close()

# Training model 2

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

dtrain.save_binary('src/musicNet/data/xgboost/train.buffer')

param = {'max_depth': 3, 'eta': 1, 'objective': 'multi:softmax'}
param = {'max_depth': 10, 'eta': 1, 'objective': 'multi:softmax'}
param['nthread'] = 4
param['eval_metric'] = 'auc'
param['subsample'] = 0.25
param['num_class'] = 5
param['eval_metric'] = ['auc', 'ams@0']

param['eval_metric'] = ['auc', 'merror']
evallist = [(dtrain, 'train'), (dtest, 'eval')]

num_round = 10000
bst = xgb.train(param, dtrain, num_round, evals=evallist, early_stopping_rounds=10)

bst = xgb.train(param, dtrain, num_round, evals=evallist, early_stopping_rounds=100)
bst.save_model('src\\musicNet\\saved_models\\bt\\austin1.model')
# dump model
bst.dump_model('src\\musicNet\\saved_models\\bt\\dump.raw.txt')
# dump model with feature map
#bst.dump_model('src/musicNet/saved_models/bt/dump.raw.txt', 'src/musicNet/saved_models/bt/featmap.txt')
#xgb.plot_importance(bst)
#xgb.plot_tree(bst, num_trees=2)
#xgb.to_graphviz(bst, num_trees=2)
ypred = bst.predict(dtest)

ypred = bst.predict(dtest)
confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()

plt.title("XGBoost Classifier - Model 2 - Confusion Matrix")
plt.show()
plt.close()

# Repackage model 2 so we can make actual predictions

xgb_clf = xgb.XGBClassifier(**param)
xgb_clf._Boster = bst

xgb_clf.fit(X_train, y_train)
xgb_clf.fit(X_train, y_train, verbose=True)
# make predictions
preds = xgb_clf.predict(X_test)
training_accuracy = xgb_clf.score(X_train, y_train)
test_accuracy = xgb_clf.score(X_test, y_test)
print("final bt")
print(training_accuracy)
print(test_accuracy)

ypred = xgb_clf.predict(X_test)
print("XGBoost Classifier - 1000 estimators, max_depth of 15, learning rate of 0.8, softmax objective function.")
print(f"Training Accuracy: {training_accuracy}")
print(f"Test Accuracy: {accuracy}")
print(f"Test F1-Score{f1_score(y_test, y_pred, average='weighted')}\n")

confusion_mat = confusion_matrix(y_test, ypred)
conf_mat_display = ConfusionMatrixDisplay(confusion_matrix=confusion_mat, display_labels=labels)
conf_mat_display.plot()
conf_mat_display.plot()
plt.title("XGBoost Classifier - Model 3 - Confusion Matrix")
plt.show()
plt.close()
Binary file modified src/musicNet/saved_models/bt/austin1.model
Binary file not shown.
Loading