-
Notifications
You must be signed in to change notification settings - Fork 1
/
Neural_Network_Histogram.py
111 lines (90 loc) · 3.52 KB
/
Neural_Network_Histogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
# load a single file as a numpy array
def load_file(filepath):
dataframe = pd.read_csv(filepath, header=None)
return dataframe.values
# load a dataset group, such as train or test, return them in the form of DataFrame
def load_dataset_group(data_type, group, prefix, filenames):
# create a list for x
x = []
# load input data
for name in filenames:
data = load_file(prefix + data_type + '/' + group + name + '_' + data_type + '.csv')
x.extend(data)
df_x = pd.DataFrame(x)
if data_type == 'Processed_PatientSpace':
df_x.drop(df_x.columns[0:36], axis=1, inplace=True)
# create a list for y
y = []
# load output data
for name in filenames:
data = load_file(prefix + 'Processed_Label/' + group + name + '_label.csv')
y.extend(data)
df_y = pd.DataFrame(y)
return df_x, df_y
# process the feature data using the histogram method
def process_data(feature, dimension, size):
processed_list = []
for index, row in feature.iterrows():
processing_list = []
for n in range(dimension):
if dimension == 36:
processing_list.append(row[n + 36] // size)
else:
processing_list.append(row[n] // size)
processed_list.append(processing_list)
df_feature = pd.DataFrame(processed_list)
return df_feature
# load the dataset, returns train and test x and y elements
def load_dataset(data_type, prefix, train_files, test_files, dimension, size):
# load all train
trainx, trainy = load_dataset_group(data_type, 'train/', prefix, train_files)
print(trainx.shape, trainy.shape)
# process the feature data
trainx = process_data(trainx, dimension, size)
print(trainx.shape, trainy.shape)
# load all test
testx, testy = load_dataset_group(data_type, 'test/', prefix, test_files)
print(testx.shape, testy.shape)
# process the feature data
testx = process_data(testx, dimension, size)
print(testx.shape, testy.shape)
# flatten y
trainy = trainy.values.flatten()
testy = testy.values.flatten()
print(trainx.shape, trainy.shape, testx.shape, testy.shape)
return trainx, trainy, testx, testy
def main():
# User can change the name these training or testing files
train_files = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6']
test_files = ['S7']
# data_type can be 'Processed_PatientSpace' or 'Processed_RawXY'
data_type = 'Processed_RawXY'
# path
path = '/Users/wangyan/Desktop/Research/Experiment_2/'
# User can adjust the number of windows for the histogram
window_num = 40
if data_type == 'Processed_PatientSpace':
dimension = 36
# the max distance is calculated by sqrt(1000^2 + 2000^2)
max_dist = 4472.23595
else:
dimension = 72
max_dist = 4000
# calculate the size of each window
size = max_dist / window_num
# load dataset
trainx, trainy, testx, testy = load_dataset(data_type, path, train_files, test_files, dimension, size)
# create neural network
mlp = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000)
# fit the model
mlp.fit(trainx, trainy)
# make predictions
yhat = mlp.predict(testx)
# evaluate predictions and print the results
print(confusion_matrix(testy, yhat))
print(classification_report(testy, yhat))
print(accuracy_score(testy, yhat))
main()