Skip to content

Commit

Permalink
Rationalized the data format in mnist_loader, and minor improvements …
Browse files Browse the repository at this point in the history
…and simplifications for network_basic
  • Loading branch information
mnielsen committed Jul 19, 2013
1 parent 372a869 commit 0edd84e
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 62 deletions.
74 changes: 38 additions & 36 deletions code/mnist_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
~~~~~~~~~~~~
A library to load the MNIST image data. For details of the data
structures that are returned, see the doc string for ``load_data``.
The library also contains a helper method ``load_data_nn`` which
returns the data in a format well adapted for use with our neural
network code.
structures that are returned, see the doc strings for ``load_data``
and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the
function usually called by our neural network code.
Note that the code requires the file ``../data/mnist.pkl``. This is not
included in the repository. It may be downloaded from:
Expand Down Expand Up @@ -40,46 +39,49 @@ def load_data():
The ``validation_data`` and ``test_data`` are similar, except
each contains only 10,000 images.
Note that the format the data is returned in is well adapted for
use by scikit-learn's SVM method, but not so well adapted for our
neural network code. For that, see the wrapper function
``load_data_nn``.
This is a nice and convenient data format, but for use in neural
networks it's actually helpful to modify the format of the
``training_data`` a little. That's done in the wrapper function
``load_data_wrapper()``, see below.
"""
f = open('../data/mnist.pkl', 'rb')
training_data, validation_data, test_data = cPickle.load(f)
f.close()
return (training_data, validation_data, test_data)

def load_data_nn():
"""Return a tuple containing ``(training_data, test_inputs,
actual_test_results)`` from the MNIST data. The tuples are in a
format optimized for use by our neural network code. This
function makesuse of ``load_data()``, but does some additional
processing to put the data in the right format.
``training_data`` is a list containing 50,000 2-tuples ``(x, y)``.
``x`` is a 784-dimensional numpy.ndarray containing the input
image. ``y`` is a 10-dimensional numpy.ndarray representing the
unit vector corresponding to the correct digit for ``x``.
``test_inputs`` is a list containing 10,000 x 784-dimensional
numpy.ndarray objects, representing test images.
``actual_test_results`` is a list containing the 10,000 digit
values (integers) corresponding to the ``test_inputs``.
Obviously, we're using slightly different formats for the training
and test data. These formats turn out to be the most convenient
for use in our neural network code."""
training_data, validation_data, test_data = load_data()
inputs = [np.reshape(x, (784, 1)) for x in training_data[0]]
results = [vectorized_result(y) for y in training_data[1]]
training_data = zip(inputs, results)
test_inputs = [np.reshape(x, (784, 1)) for x in test_data[0]]
return (training_data, test_inputs, test_data[1])
def load_data_wrapper():
"""Return a tuple containing ``(training_data, validation_data,
test_data)``. Based on ``load_data``, but the format is a little more
convenient for use in neural networks.
In particular, ``training_data`` is a list containing 50,000
2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
containing the input image. ``y`` is a 10-dimensional
numpy.ndarray representing the unit vector corresponding to the
correct digit for ``x``.
``validation_data`` and ``test_data`` are lists containing 10,000
2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
numpy.ndarry containing the input image, and ``y`` is the
corresponding classification, i.e., the digit values (integers)
corresponding to ``x``.
Obviously, this means we're using slightly different formats for
the training data and the validation / test data. These formats
turn out to be the most convenient for use in our neural network
code."""
tr_d, va_d, te_d = load_data()
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
training_results = [vectorized_result(y) for y in tr_d[1]]
training_data = zip(training_inputs, training_results)
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
validation_data = zip(validation_inputs, va_d[1])
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
test_data = zip(test_inputs, te_d[1])
return (training_data, validation_data, test_data)

def vectorized_result(j):
""" Return a 10-dimensional unit vector with a 1.0 in the jth
"""Return a 10-dimensional unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output from the neural
network."""
Expand Down
38 changes: 12 additions & 26 deletions code/network_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def feedforward(self, a):
return a

def SGD(self, training_data, epochs, mini_batch_size, eta,
lmbda, test=False, test_data=None)
lmbda, test=False, test_data=None):
"""Train the neural network using mini-batch stochastic
gradient descent. The ``training_data`` is a list of tuples
``(x, y)`` representing the training inputs and the desired
Expand All @@ -64,16 +64,16 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
self.backprop(mini_batch, n, eta, lmbda)
if test:
print "Epoch {}: {} / {}".format(
j, self.evaluate(test_inputs, actual_test_results), n_test)
j, self.evaluate(test_data), n_test)
else:
print "Epoch %s complete" % j

def backprop(self, training_data, T, eta, lmbda):
def backprop(self, training_data, n, eta, lmbda):
"""Update the network's weights and biases by applying a
single iteration of gradient descent using backpropagation.
The ``training_data`` is a list of tuples ``(x, y)``. It need
not include the entire training data set --- it might be a
mini-batch, or even a single training example. ``T`` is the
mini-batch, or even a single training example. ``n`` is the
size of the total training set (which may not be the same as
the size of ``training_data``). The other parameters are
self-explanatory."""
Expand Down Expand Up @@ -108,19 +108,18 @@ def backprop(self, training_data, T, eta, lmbda):
nabla_b[-l] += delta
nabla_w[-l] += np.dot(delta, activations[-l-1].transpose())
# Add the regularization terms to the gradient for the weights
nabla_w = [nw+(lmbda*B/T)*w for nw, w in zip(nabla_w, self.weights)]
nabla_w = [nw+(lmbda*B/n)*w for nw, w in zip(nabla_w, self.weights)]
self.weights = [w-eta*nw for w, nw in zip(self.weights, nabla_w)]
self.biases = [b-eta*nb for b, nb in zip(self.biases, nabla_b)]

def evaluate(self, test_inputs, actual_test_results):
"""Return the number of ``test_inputs`` for which the neural
network outputs the correct result, i.e., the same result as
given in ``actual_test_results``. Note that the neural
def evaluate(self, test_data):
"""Return the number of test inputs for which the neural
network outputs the correct result. Note that the neural
network's output is assumed to be the index of whichever
neuron in the final layer has the highest activation."""
test_results = [np.argmax(self.feedforward(x)) for x in test_inputs]
test_results = [np.argmax(self.feedforward(x)) for x in test_data[0]]
return sum(int(x == y)
for x, y in zip(test_results, actual_test_results))
for x, y in zip(test_results, test_data[1]))

def cost(self, x, y):
"""Return the quadratic cost associated to the network, with
Expand All @@ -135,23 +134,10 @@ def cost_derivative(self, output_activations, y):
between the output activations and the desired output, ``y``."""
return (output_activations-y)

def evaluate_training_results(self, training_data):
"""Return the number of elements of the ``training_data`` that
are correctly classified."""
training_results = [np.argmax(self.feedforward(x[0])) for x in
training_data]
actual_training_results = [np.argmax(x[1]) for x in training_data]
return sum(int(x == y)
for x, y in zip(training_results, actual_training_results))

#### Miscellaneous functions
def sigmoid(z):
"""The sigmoid function. Note that it checks to see whether ``z``
is very negative, to avoid overflow errors in the exponential
function. No corresponding test of ``z`` being very positive is
necessary --- ordinary Python arithmetic deals just fine with that
case."""
return 0.0 if z < -700 else 1.0/(1.0+np.exp(-z))
"""The sigmoid function."""
return 1.0/(1.0+np.exp(-z))

sigmoid_vec = np.vectorize(sigmoid)

Expand Down

0 comments on commit 0edd84e

Please sign in to comment.