-
Notifications
You must be signed in to change notification settings - Fork 0
/
manual_experiment.py
92 lines (71 loc) · 3.22 KB
/
manual_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from scipy.spatial import distance
from scipy.stats.stats import ttest_ind
from sklearn import preprocessing
from sklearn.ensemble.forest import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from analysis.dataset_utils import ArffLoader
from analysis.experiment import Experiment
from core.centroid_picker import RandomCentroidPicker, AlmostRandomCentroidPicker
from core.ensemble_trainer import EnsembleTrainer
from core.eole import EOLE
from core.exponential_weigher import ExponentialWeigher
__author__ = 'Emanuele Tamponi'
def main():
dataset = "autos"
dataset_path = "evaluation/datasets/{}.arff".format(dataset)
n_experts = 10
n_inner_experts = 1
if n_inner_experts == 1:
base_estimator = DecisionTreeClassifier(max_features=0.5, max_leaf_nodes=40)
else:
base_estimator = RandomForestClassifier(max_features="auto", n_estimators=n_inner_experts)
centroid_picker = AlmostRandomCentroidPicker(dist_measure=distance.euclidean)
weigher_sampler = ExponentialWeigher(precision=1, power=1, dist_measure=distance.euclidean, sample_percent=None)
eole = make_eole(n_experts, base_estimator, centroid_picker, weigher_sampler)
rf = make_random_forest(n_experts, n_inner_experts)
loader = ArffLoader(dataset_path)
n_folds = 10
n_repetitions = 10
experiment_eole = Experiment("{}_eole".format(dataset), eole, loader, n_folds, n_repetitions)
experiment_rf = Experiment("{}_rf".format(dataset), rf, loader, n_folds, n_repetitions)
report_eole = experiment_eole.run()
accuracy_eole = report_eole.synthesis()["accuracy"]["mean"]
print "EOLE: {:.3f} {:.3f} ({})".format(accuracy_eole[-1], accuracy_eole.max(), accuracy_eole.argmax())
report_rf = experiment_rf.run()
accuracy_rf = report_rf.synthesis()["accuracy"]["mean"]
print "EOLE: {:.3f} {:.3f} ({})".format(accuracy_eole[-1], accuracy_eole.max(), accuracy_eole.argmax())
print "RF: {:.3f} {:.3f} ({})".format(accuracy_rf[-1], accuracy_rf.max(), accuracy_rf.argmax())
p = one_side_test(report_eole.accuracy_sample[:, -1], report_rf.accuracy_sample[:, -1])
print "P(EOLE > RF) = {:.3f} => {}".format(p, p > 0.95)
def make_eole(n_experts, base_estimator, centroid_picker, weigher_sampler):
return EOLE(
n_experts=n_experts,
ensemble_trainer=EnsembleTrainer(
base_estimator=base_estimator,
centroid_picker=centroid_picker,
weigher_sampler=weigher_sampler
),
preprocessor=preprocessing.MinMaxScaler(),
use_probs=True,
use_competences=False
)
def make_random_forest(n_experts, n_inner_experts):
return EOLE(
n_experts=1,
ensemble_trainer=EnsembleTrainer(
base_estimator=RandomForestClassifier(n_estimators=n_experts*n_inner_experts, max_features="auto"),
centroid_picker=RandomCentroidPicker(),
weigher_sampler=ExponentialWeigher(precision=0, power=1)
),
preprocessor=None,
use_probs=True,
use_competences=False
)
def one_side_test(first, second):
value, p = ttest_ind(first, second, equal_var=False)
if value < 0:
return 0.0
else:
return 1 - p / 2
if __name__ == "__main__":
main()