-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy paththree_l_nn.py
187 lines (157 loc) · 8.56 KB
/
three_l_nn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import numpy as np
import numpy.random as nprand
class ThreeLayerNN:
def __init__(self, l1_tot_nodes=0, l2_tot_nodes=0, l3_tot_nodes=0):
self.nn_structure = [l1_tot_nodes, l2_tot_nodes, l3_tot_nodes]
def sigmoid(self, input_x):
"""
Sigmoid Function is used here as the node activation function.
"""
return 1 / (1 + np.exp(-input_x))
def sigmoid_derivative(self, input_x):
return self.sigmoid(input_x) * (1 - self.sigmoid(input_x))
def init_weights_biases(self):
"""
Step 1.
Randomly initialise weights and biases for input layer
and all hidden layers.
"""
weights = {}
biases = {}
for layer in range(1, len(self.nn_structure)):
weights[layer] = nprand.random_sample((self.nn_structure[layer], self.nn_structure[layer-1]))
biases[layer] = nprand.random_sample((self.nn_structure[layer],))
return weights, biases
def reset_cost_partial_derivs_sum(self):
"""
Step 2.
At each iteration until the exit condition is reached,
set the cumulative sum of all partial derivatives
of the cost function, to zero
for the input layer and all hidden layer's weights and biases
"""
cost_weights_partial_derivs_sums = {}
cost_biases_partial_derivs_sums = {}
for layer in range(1, len(self.nn_structure)):
cost_weights_partial_derivs_sums[layer] = np.zeros((self.nn_structure[layer], self.nn_structure[layer-1]))
cost_biases_partial_derivs_sums[layer] = np.zeros((self.nn_structure[layer],))
return cost_weights_partial_derivs_sums, cost_biases_partial_derivs_sums
def feed_forward(self, inputs_x, weights, biases):
"""
Step 3.
For each sample,
perform a feed-forward pass through all layers
"""
# the input to the first layer nodes are the Xs (i.e., features
# of the current input sample
layer_to_activation_funct_outputs = {1: inputs_x}
z = {}
for layer in range(1, len(weights)+1):
if layer == 1:
nodes_input = inputs_x
else:
nodes_input = layer_to_activation_funct_outputs[layer]
z[layer+1] = weights[layer].dot(nodes_input) + biases[layer]
layer_to_activation_funct_outputs[layer+1] = self.sigmoid(z[layer+1])
return layer_to_activation_funct_outputs, z
def output_layer_cost_gradient(self, expected_outputs, actual_activation_outputs, actual_outputs):
"""
Step 4.
At the end of the feed-forward pass on the output layer,
if the minimum of the cost function curve has not been reached yet
then Calculate the gradient (slope) of the cost function
in order to later decide how to better adjust weights and biases.
"""
# delta^(nl) = -(y_i - h_i^(nl)) * f'(z_i^(nl))
return -(expected_outputs - actual_activation_outputs) * self.sigmoid_derivative(actual_outputs)
def hidden_layers_cost_gradient(self, gradient_at_layer_plus_1, weights_at_layer, z_at_layer):
"""
Step 5.
Use back-propagation to calculate the gradient of the cost function
for all hidden layers
"""
return np.dot(np.transpose(weights_at_layer), gradient_at_layer_plus_1) * self.sigmoid_derivative(z_at_layer)
def convert_single_to_multi_output(self, expected_class):
expected_outputs = []
for ind in range(0, self.nn_structure[2]):
if ind == expected_class:
expected_outputs.append(1)
else:
expected_outputs.append(0)
return expected_outputs
def convert_multi_to_single_output(self, multi_output):
for ind in range(0, len(multi_output)):
if multi_output[ind] != 0:
return ind
return -1
def train_nn(self, inputs_x, outputs_y, weights, biases, max_iterations = 3000, step = 0.25):
# for each sample, iterate feed-fwd and back-propagation to adjust the weights and biases
avg_cost_curve = []
for sample_index in range(0, len(inputs_x)):
print('Training NN with sample #{}').format(sample_index)
expected_outputs = self.convert_single_to_multi_output(outputs_y[sample_index])
inputs_x_curr = inputs_x[sample_index]
iteration = 0
tot_classes_to_predict = self.nn_structure[2]
output_layer = len(self.nn_structure)
print('Starting gradient descent for {} iterations').format(max_iterations)
# at each iteration until the limit is reached
while iteration < max_iterations:
if iteration % 1000 == 0:
print('Iteration {} of {}'.format(iteration, max_iterations))
# 2) reset the cumulative sum of partial derivatives for weights and for biases to zero
cost_weights_partial_derivs_sums, cost_biases_partial_derivs_sums = self.reset_cost_partial_derivs_sum()
avg_cost = 0
# for each feature (i.e., input x) of the current sample
for predicted_class_index in range(len(expected_outputs)):
cost_gradients = {}
# 3) feed-fwd through all layers
layer_to_activation_funct_output, z = self.feed_forward(inputs_x_curr, weights, biases)
# 4) 5) back-propagation: calculate cost, its gradient and consequent adjustments to weights and biases
for layer in range(output_layer, 0, -1):
if layer == output_layer:
cost_gradients[layer] = self.output_layer_cost_gradient(expected_outputs[predicted_class_index], layer_to_activation_funct_output[layer], z[layer])
avg_cost += np.linalg.norm((expected_outputs[predicted_class_index] - layer_to_activation_funct_output[layer]))
else:
if layer > 1:
cost_gradients[layer] = self.hidden_layers_cost_gradient(cost_gradients[layer+1], weights[layer], z[layer])
cost_weights_partial_derivs_sums[layer] += np.dot(cost_gradients[layer+1][:, np.newaxis], np.transpose(layer_to_activation_funct_output[layer][:, np.newaxis]))
cost_biases_partial_derivs_sums[layer] += cost_gradients[layer+1]
# 6) adjust weights and biases accordingly (i.e., gradient descent step)
for layer in range(output_layer -1, 0, -1):
weights[layer] += -step * (1.0/tot_classes_to_predict * cost_weights_partial_derivs_sums[layer])
biases[layer] += -step * (1.0/tot_classes_to_predict * cost_biases_partial_derivs_sums[layer])
# 7) complete the average cost calculation
# the cost for this iteration is
# the sum of all the costs at the output layer
# for each predicted class
# over the total amount of predictable classes (i.e., avg)
avg_cost = 1.0/tot_classes_to_predict * avg_cost
# the avg cost for this iteration is a point in the curve
# which we want to be as close to the min of that curve
# as possible
avg_cost_curve.append(avg_cost)
iteration += 1
return weights, biases, avg_cost_curve
def predict_classes(self, weights, biases, inputs_x_curr):
tot_classes_to_predict = self.nn_structure[2]
classes = np.zeros((tot_classes_to_predict,))
for i in range(tot_classes_to_predict):
h, z = self.feed_forward(inputs_x_curr, weights, biases)
print h[3]
classes[i] = np.argmax(h[len(self.nn_structure)])
print 'classes[i]'
print classes[i]
print 'predicted classes:'
print classes
return classes
def test_nn(self, weights, biases, x_test_set):
prediction = []
for sample_index in range(0, len(x_test_set)):
print('Testing NN with sample #{}').format(sample_index)
sample_prediction = self.predict_classes(weights, biases, x_test_set[sample_index])
single_output = self.convert_multi_to_single_output(sample_prediction)
print 'single output'
print single_output
prediction.append(single_output)
return prediction