forked from Wei-1/Scala-Machine-Learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNeuralNetwork.scala
413 lines (396 loc) · 15.6 KB
/
NeuralNetwork.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
// Neural Network - The Hard Way
// Wei Chen
// 2018-10-02
// From Google's NN-Playground in TypeScript to Scala (merging Plyaground, State, and NN)
package com.scalaml.algorithm
/**
* A node in a neural network. Each node has a state
* (total input, output, and their respectively derivatives) which changes
* after every forward and back propagation run.
*/
class Node(
/**
* Creates a new node with the provided id and activation function.
*/
val id: String = null,
/** Activation function that takes total input and returns node's output */
val activation: ActivationFunction = null,
val initZero: Boolean = false
) {
/** List of input links. */
var inputLinks = Array[Link]()
var bias: Double = if(initZero) 1e-8 else 0.1
/** List of output links. */
var outputLinks = Array[Link]()
/** Node input and output. */
var totalInput: Double = 0.0
var output: Double = 0.0
/** Error derivative with respect to this node's output. */
var outputDer: Double = 0.0
var rawOutputDer: Double = 0.0
/** Error derivative with respect to this node's total input. */
var inputDer: Double = 0.0
/**
* Accumulated error derivative with respect to this node's total input since
* the last update. This derivative equals dE/db where b is the node's
* bias term.
*/
var accInputDer: Double = 0.0
/**
* Number of accumulated err. derivatives with respect to the total input
* since the last update.
*/
var numAccumulatedDers: Double = 0.0
/** Recomputes the node's output and returns it. */
def updateOutput(): Double = {
// Stores total input into the node.
totalInput = bias + inputLinks.map(link => link.weight * link.source.output).sum
output = activation.output(totalInput)
output
}
}
/** Built-in error functions */
trait ErrorFunction {
def error(output: Double, target: Double): Double
def der(output: Double, target: Double): Double
}
object SQUARE extends ErrorFunction { // Square error only
override def error(output: Double, target: Double): Double =
0.5 * Math.pow(output - target, 2)
override def der(output: Double, target: Double): Double =
output - target
}
/** Built-in activation functions */
trait ActivationFunction {
def output(x: Double): Double
def der(x: Double): Double
}
object TANH extends ActivationFunction {
override def output(x: Double): Double = Math.tanh(x)
override def der(x: Double): Double = 1 - Math.pow(output(x), 2)
}
object SIGMOID extends ActivationFunction {
override def output(x: Double): Double = 1 / (1 + Math.exp(-x))
override def der(x: Double): Double = {
val out = output(x)
out * (1 - out)
}
}
object RELU extends ActivationFunction {
override def output(x: Double): Double = Math.max(0, x)
override def der(x: Double): Double = if(x <= 0) 0 else 1
}
object LINEAR extends ActivationFunction {
override def output(x: Double): Double = x
override def der(x: Double): Double = 1
}
/** Build-in regularization functions */
trait RegularizationFunction {
def output(w: Double): Double
def der(w: Double): Double
}
object L1 extends RegularizationFunction {
override def output(w: Double): Double = Math.abs(w)
override def der(w: Double): Double = if(w < 0) -1 else if(w > 0) 1 else 0
}
object L2 extends RegularizationFunction {
override def output(w: Double): Double = 0.5 * Math.pow(w, 2)
override def der(w: Double): Double = w
}
/**
* A link in a neural network. Each link has a weight and a source and
* destination node. Also it has an internal state (error derivative
* with respect to a particular input) which gets updated after
* a run of back propagation.
*/
class Link(
/**
* Constructs a link in the neural network initialized with random weight.
*
* @param source The source node.
* @param dest The destination node.
* @param regularization The regularization function that computes the
* penalty for this weight. If null, there will be no regularization.
*/
val source: Node,
val dest: Node,
val regularization: RegularizationFunction = null,
val initZero: Boolean = false
) {
val id: String = source.id + "-" + dest.id
var weight: Double = if(initZero) 1e-8 else Math.random() - 0.5
var isDead: Boolean = false
/** Error derivative with respect to this weight. */
var errorDer: Double = 0.0
/** Accumulated error derivative since the last update. */
var accErrorDer: Double = 0.0
/** Number of accumulated derivatives since the last update. */
var numAccumulatedDers: Double = 0.0
}
/**
* A wrapper for all neural network functions
*/
class NeuralNetwork {
var networkShape: Array[Int] = Array(4, 2)
var activation: ActivationFunction = TANH
var outputActivation: ActivationFunction = LINEAR
var regularization: RegularizationFunction = null
var inputIds: Array[String] = Array[String]()
var initZero: Boolean = false // fix 0.0 for testing
var index: Int = 0
var updateIndex: Int = 0
var batchSize: Int = 10
var learningRate: Double = 0.03
var regularizationRate: Double = 0.01
var network = Array[Array[Node]]()
var gradientClipping: Boolean = false
/**
* Builds a neural network.
*
* @param networkShape The shape of the network. E.g. [1, 2, 3, 1] means
* the network will have one input node, 2 nodes in first hidden layer,
* 3 nodes in second hidden layer and 1 output node.
* @param activation The activation function of every hidden node.
* @param outputActivation The activation function for the output nodes.
* @param regularization The regularization function that computes a penalty
* for a given weight (parameter) in the network. If null, there will be
* no regularization.
* @param inputIds List of ids for the input nodes.
*/
def config(
_networkShape: Array[Int] = networkShape,
_activation: ActivationFunction = activation,
_outputActivation: ActivationFunction = outputActivation,
_regularization: RegularizationFunction = regularization,
_inputIds: Array[String] = inputIds,
_initZero: Boolean = initZero,
_index: Int = index,
_updateIndex: Int = updateIndex,
_batchSize: Int = batchSize,
_learningRate: Double = learningRate,
_regularizationRate: Double = regularizationRate,
_gradientClipping: Boolean = gradientClipping
): Boolean = {
try {
/** Parameters */
networkShape = _networkShape
activation = _activation
outputActivation = _outputActivation
regularization = _regularization
inputIds = _inputIds
initZero = _initZero
index = _index
updateIndex = _updateIndex
batchSize = _batchSize
learningRate = _learningRate
regularizationRate = _regularizationRate
gradientClipping = _gradientClipping
/** Network */
val numLayers = networkShape.length
var id = 1
/** List of layers, with each layer being a list of nodes. */
network = Array[Array[Node]]()
for(layerIdx <- 0 until numLayers) {
val isOutputLayer: Boolean = layerIdx == (numLayers - 1)
val numNodes = networkShape(layerIdx)
val currentLayer = (for(i <- 0 until numNodes) yield {
var nodeId = id.toString()
if(layerIdx == 0 && inputIds.size == numNodes) {
nodeId = inputIds(i)
} else {
id += 1
}
val node = new Node(
nodeId,
if(isOutputLayer) outputActivation else activation,
initZero
)
if(layerIdx >= 1) {
// Add links from nodes in the previous layer to this node.
network(layerIdx - 1).foreach { prevNode =>
val link = new Link(prevNode, node, regularization, initZero)
prevNode.outputLinks :+= link
node.inputLinks :+= link
}
}
node
}).toArray
network :+= currentLayer
}
true
} catch { case e: Exception =>
Console.err.println(e)
false
}
}
/**
* Runs a forward propagation of the provided input through the provided
* network. This method modifies the internal state of the network - the
* total input and output of each node in the network.
*
* @param network The neural network.
* @param inputs The input array. Its length should match the number of input
* nodes in the network.
* @return The final output of the network.
*/
def forwardProp(
inputs: Array[Double]
): Array[Double] = {
val inputLayer = network.head
// Update the input layer.
for(i <- 0 until inputLayer.length) {
val node = inputLayer(i)
node.output = inputs(i)
}
network.drop(1).foreach { currentLayer =>
// Update all the nodes in this layer.
for(node <- currentLayer) node.updateOutput()
}
getOutputNodes.map(_.output)
}
/**
* Runs a backward propagation using the provided target and the
* computed output of the previous call to forward propagation.
* This method modifies the internal state of the network - the error
* derivatives with respect to each node, and each weight
* in the network.
*/
def backProp(
targets: Array[Double],
errorFunc: ErrorFunction = SQUARE,
_outputWeights: Array[Double] = Array.fill[Double](networkShape.last)(1.0)
): Unit = {
// The output node is a special case. We use the user-defined error
// function for the derivative.
for((node, target) <- getOutputNodes.zip(targets)) {
node.rawOutputDer = errorFunc.der(node.output, target)
}
for((node, weight) <- getOutputNodes.zip(_outputWeights)) {
node.outputDer = node.rawOutputDer * weight
}
// Go through the layers backwards.
for(layerIdx <- network.length - 1 to 1 by -1) {
val currentLayer = network(layerIdx)
// Compute the error derivative of each node with respect to:
// 1) its total input
// 2) each of its input weights.
for(node <- currentLayer) {
node.inputDer = node.outputDer * node.activation.der(node.totalInput)
node.accInputDer += node.inputDer
node.numAccumulatedDers += 1
}
// Error derivative with respect to each weight coming into the node.
for(node <- currentLayer) {
for(link <- node.inputLinks) {
if(!link.isDead) {
link.errorDer = node.inputDer * link.source.output
if(gradientClipping) {
link.errorDer = Math.max(-1, Math.min(1, link.errorDer))
}
link.accErrorDer += link.errorDer
link.numAccumulatedDers += 1
}
}
}
if(layerIdx > 1) {
val prevLayer = network(layerIdx - 1)
for(node <- prevLayer) {
// Compute the error derivative with respect to each node's output.
node.outputDer = 0.0
for(output <- node.outputLinks) {
node.outputDer += output.weight * output.dest.inputDer
}
}
}
}
}
/**
* Updates the weights of the network using the previously accumulated error
* derivatives.
*/
def updateWeights(): Unit = {
for(currentLayer <- network.drop(1); node <- currentLayer) {
// Update the node's bias.
if(node.numAccumulatedDers > 0) {
node.bias -= learningRate * node.accInputDer / node.numAccumulatedDers
node.accInputDer = 0.0
node.numAccumulatedDers = 0.0
}
// Update the weights coming into this node.
for(link <- node.inputLinks) {
if(!link.isDead) {
if(link.numAccumulatedDers > 0) {
// Update the weight based on dE/dw.
link.weight = link.weight -
(learningRate / link.numAccumulatedDers) * link.accErrorDer
// Further update the weight based on regularization.
val regulDer = if(link.regularization != null) link.regularization.der(link.weight) else 0.0
val newLinkWeight = link.weight - (learningRate * regularizationRate) * regulDer
if(link.regularization != null && link.regularization.der(2) == 1 &&
link.weight * newLinkWeight < 0) {
// The weight crossed 0 due to the regularization term. Set it to 0.
// Console.err.println(" --------------- YOU GOT A DEAD CELL -------------- ")
link.weight = 0.0
link.isDead = true
} else {
link.weight = newLinkWeight
}
link.accErrorDer = 0.0
link.numAccumulatedDers = 0.0
}
}
}
}
}
/** Returns the output node in the network. */
def getOutputNodes: Array[Node] =
network.last
/** Reset the network parameters and iteration pointer */
def reset(onStartup: Boolean = false): Boolean = {
networkShape = Array(4, 2)
activation = TANH
outputActivation = LINEAR
regularization = null
inputIds = Array[String]()
initZero = false
index = 0
updateIndex = 0
batchSize = 10
learningRate = 0.03
regularizationRate = 0.0
config()
}
/** Clear current network */
def clear() = reset(false)
/** Train one inputs to one targets, moved and Modified from Playground. */
def trainOne(
inputs: Array[Double], targets: Array[Double],
errorFunc: ErrorFunction = SQUARE,
_outputWeights: Array[Double] = Array.fill[Double](networkShape.last)(1.0)
): Unit = {
forwardProp(inputs)
backProp(targets, errorFunc, _outputWeights)
if((index - updateIndex + 1) % batchSize == 0) {
updateIndex = index
updateWeights()
}
index += 1
}
/** Predict one inputs */
def predictOne = forwardProp _
/** Train all data */
def train(
x: Array[Array[Double]], y: Array[Array[Double]],
errorFunc: ErrorFunction = SQUARE,
iter: Int = 1,
_learningRate: Double = learningRate,
_outputWeights: Array[Double] = Array.fill[Double](networkShape.last)(1.0)
): Boolean = {
learningRate = _learningRate
val data = x.zip(y)
for(i <- 0 until iter) data.foreach { case (inputs, targets) => trainOne(inputs, targets, errorFunc, _outputWeights) }
true
}
/** Predict all data */
def predict(data: Array[Array[Double]]): Array[Array[Double]] = data.map(inputs => predictOne(inputs))
}