Skip to content

Commit

Permalink
Merge pull request #45 from abarton51/musicnet_preprocessing
Browse files Browse the repository at this point in the history
Musicnet preprocessing
  • Loading branch information
abarton51 authored Dec 1, 2023
2 parents cd7748b + 5186d38 commit d5373f8
Show file tree
Hide file tree
Showing 14 changed files with 240 additions and 3 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
27 changes: 27 additions & 0 deletions src/musicNet/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np
import os
import sys
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

path = 'src/musicNet/processed_data'
X_train = np.load(path + '/train_data_midi.npy')
X_test = np.load(path + '/test_data_midi.npy')
y_train = np.load(path + '/train_labels_midi.npy')
y_test = np.load(path + '/test_labels_midi.npy')

dt_clf = DecisionTreeClassifier(random_state=42)

dt_clf.fit(X_train, y_train)
training_accuracy = dt_clf.score(X_train, y_train)
accuracy = dt_clf.score(X_test, y_test)
print(training_accuracy)
print(accuracy)

rf_clf = RandomForestClassifier(random_state=42, max_features=512, n_estimators=100)

rf_clf.fit(X_train, y_train)
training_accuracy = rf_clf.score(X_train, y_train)
accuracy = rf_clf.score(X_test, y_test)
print(training_accuracy)
print(accuracy)
10 changes: 7 additions & 3 deletions src/musicNet/musicnet4641.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,15 @@ def create_id_dict(df):
id_dict[composers[i]] = df.loc[df['composer'].values == composers[i]]['id'].values
return id_dict

def trim_data_dict(data_dict, min_len=20):
def trim_data_dict(data_dict, min_len=20, is_np_array=False):
new_data_dict = {}
for composer in list(data_dict.keys()):
if len(data_dict[composer]) >= min_len:
new_data_dict[composer] = data_dict[composer]
if is_np_array:
if data_dict[composer].shape[0] >= min_len:
new_data_dict[composer] = data_dict[composer]
else:
if len(data_dict[composer]) >= min_len:
new_data_dict[composer] = data_dict[composer]
return new_data_dict

def rand_id_sample(dict):
Expand Down
Binary file added src/musicNet/processed_data/test_data_midi.npy
Binary file not shown.
Binary file added src/musicNet/processed_data/test_labels_midi.npy
Binary file not shown.
Binary file added src/musicNet/processed_data/train_data_midi.npy
Binary file not shown.
Binary file added src/musicNet/processed_data/train_labels_midi.npy
Binary file not shown.
206 changes: 206 additions & 0 deletions tabs/final_report.md

Large diffs are not rendered by default.

0 comments on commit d5373f8

Please sign in to comment.