-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExtraTrees.py
82 lines (59 loc) · 3.2 KB
/
ExtraTrees.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# imports
from sklearn.metrics import log_loss
import csv
import pandas as pd
import numpy as np
from numpy import save
from numpy import load
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import ExtraTreesClassifier
# read features files
y_train = load('features/train_y.npy')
features_train = load('features/features_train.npy')
features_test = load('features/features_test.npy')
X_train = features_train
y_train = y_train
X_test = features_test
# 5-fold cross validation setup
cross_validation_1_train = [x for x in range(int(X_train.shape[0]/5) ,X_train.shape[0])]
cross_validation_2_train = [x for x in range(int(X_train.shape[0]/5))]+[x for x in range(int(X_train.shape[0]*2/5) ,X_train.shape[0])]
cross_validation_3_train = [x for x in range(int(X_train.shape[0]*2/5))]+[x for x in range(int(X_train.shape[0]*3/5) ,X_train.shape[0])]
cross_validation_4_train = [x for x in range(int(X_train.shape[0]*3/5))]+[x for x in range(int(X_train.shape[0]*4/5) ,X_train.shape[0])]
cross_validation_5_train = [x for x in range(int(X_train.shape[0]*4/5))]
cross_validation_train = [cross_validation_1_train]+[cross_validation_2_train]+[cross_validation_3_train]+[cross_validation_4_train]+[cross_validation_5_train]
cross_validation_train = np.array(cross_validation_train)
cross_validation_1_test = [x for x in range(int(y_train.shape[0]/5))]
cross_validation_2_test = [x for x in range(int(y_train.shape[0]/5), int(y_train.shape[0]*2/5))]
cross_validation_3_test = [x for x in range(int(y_train.shape[0]*2/5), int(y_train.shape[0]*3/5))]
cross_validation_4_test = [x for x in range(int(y_train.shape[0]*3/5), int(y_train.shape[0]*4/5))]
cross_validation_5_test = [x for x in range(int(y_train.shape[0]*4/5), int(y_train.shape[0]))]
cross_validation_test = [cross_validation_1_test]+[cross_validation_2_test]+[cross_validation_3_test]+[cross_validation_4_test]+[cross_validation_5_test]
cross_validation_test = np.array(cross_validation_test)
# model training, with cross validation
for j in range(cross_validation_train.shape[0]):
model = ExtraTreesClassifier(n_estimators=500, random_state=0, max_features = 100)
clf = MultiOutputClassifier(model).fit(X_train[cross_validation_train[j]], y_train[cross_validation_train[j]])
y_pred = (np.array(clf.predict_proba(X_train[cross_validation_test[j]])).T)[1]
y_pred_test = (np.array(clf.predict_proba(X_test)).T)[1]
y_pred_train = (np.array(clf.predict_proba(X_train)).T)[1]
if j == 0:
svc_results = y_pred
svc_results_test = y_pred_test
svc_results_train = y_pred_train
else:
svc_results = np.vstack((svc_results, y_pred))
svc_results_test = (svc_results_test*j+y_pred_test)/(j+1)
svc_results_train = (svc_results_train*j+y_pred_train)/(j+1)
print("iteration : "+ str(j)+" Finished")
# log loss calculation
loss_sum = 0
for i in range(8):
loss_sum += log_loss(y_train[:,i],svc_results[:,i])
print("CV Training loss : "+ str(loss_sum/8))
loss_sum = 0
for i in range(8):
loss_sum += log_loss(y_train[:,i],svc_results_train[:,i])
print("Training loss : "+ str(loss_sum/8))
# save results
save("features/TREE_features_train.npy",svc_results)
save("features/TREE_features_test.npy",svc_results_test)