Skip to content
This repository has been archived by the owner on Sep 1, 2022. It is now read-only.

Commit

Permalink
added random_state to nnets, irpop use all the dataset at this moment…
Browse files Browse the repository at this point in the history
… (TODO select something more complicated)
  • Loading branch information
arogozhnikov committed Nov 3, 2014
1 parent dbcad63 commit 2b82fd5
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 22 deletions.
2 changes: 1 addition & 1 deletion cmsuml/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__author__ = 'axelr'
__author__ = 'Alex Rogozhnikov'
67 changes: 52 additions & 15 deletions cmsuml/nnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,15 @@ def sgd_trainer(x, y, w, parameters, derivatives, loss,
parameters[name].set_value(val)


def irprop_minus_trainer(x, y, w, parameters, derivatives, loss, stages=100, max_stage_samples=1000,
def irprop_minus_trainer(x, y, w, parameters, derivatives, loss, stages=100,
positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6, random=numpy.random):
""" IRPROP- trainer, see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.3428 """
deltas = dict([(name, 1e-3 * numpy.ones_like(p)) for name, p in parameters.iteritems()])
prev_derivatives = dict([(name, numpy.zeros_like(p)) for name, p in parameters.iteritems()])
xT = x.T
for _ in range(stages):
xTp, yp, wp = get_batch(xT, y, w, batch=max_stage_samples, random=random)
for name in parameters:
new_derivative = derivatives[name](xTp, yp, wp)
new_derivative = derivatives[name](xT, y, w)
old_derivative = prev_derivatives[name]
delta = deltas[name]
delta = numpy.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
Expand All @@ -78,21 +77,21 @@ def irprop_minus_trainer(x, y, w, parameters, derivatives, loss, stages=100, max
prev_derivatives[name] = new_derivative


def irprop_plus_trainer(x, y, w, parameters, derivatives, loss, stages=100, max_stage_samples=1000,
def irprop_plus_trainer(x, y, w, parameters, derivatives, loss, stages=100,
positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6, random=numpy.random):
"""IRPROP+ trainer, see http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.17.1332"""
deltas = dict([(name, 1e-3 * numpy.ones_like(p)) for name, p in parameters.iteritems()])
prev_derivatives = dict([(name, numpy.zeros_like(p)) for name, p in parameters.iteritems()])
prev_loss_value = 1e10
xT = x.T
for _ in range(stages):
xTp, yp, wp = get_batch(xT, y, w, batch=max_stage_samples, random=random)
loss_value = loss(xTp, yp, wp)
loss_value = loss(xT, y, w)
for name in parameters:
new_derivative = derivatives[name](xTp, yp, wp)
new_derivative = derivatives[name](xT, y, w)
old_derivative = prev_derivatives[name]
val = parameters[name].get_value()
delta = deltas[name]
# TODO this is wrong IRPROP+ implementation
if loss_value > prev_loss_value:
# step back
val += numpy.where(new_derivative * old_derivative < 0, delta * numpy.sign(old_derivative), 0)
Expand All @@ -106,6 +105,27 @@ def irprop_plus_trainer(x, y, w, parameters, derivatives, loss, stages=100, max_
prev_loss_value = loss_value


def irprop_extended_trainer(x, y, w, parameters, derivatives, loss, stages=100,
positive_step=1.2, negative_step=0.5, max_step=1., min_step=1e-6, random=numpy.random):
""" Modified version of irprop """
deltas = dict([(name, 1e-3 * numpy.ones_like(p)) for name, p in parameters.iteritems()])
prev_derivatives = dict([(name, numpy.zeros_like(p)) for name, p in parameters.iteritems()])
xT = x.T
for _ in range(stages):
for name in parameters:
new_derivative = derivatives[name](xT, y, w)
old_derivative = prev_derivatives[name]
delta = deltas[name]
delta = numpy.where(new_derivative * old_derivative > 0, delta * positive_step, delta * negative_step)
delta = numpy.clip(delta, min_step, max_step)
deltas[name] = delta
val = parameters[name].get_value()
parameters[name].set_value(val - delta * numpy.sign(new_derivative))
new_derivative[new_derivative * old_derivative < 0] = 0
prev_derivatives[name] = new_derivative



trainers = {'sgd': sgd_trainer, 'irprop-': irprop_minus_trainer, 'irprop+': irprop_plus_trainer}
#endregion

Expand Down Expand Up @@ -213,11 +233,11 @@ def fit(self, X, y, sample_weight=None, trainer=None, **trainer_parameters):
#region Neural networks

class SimpleNeuralNetwork(AbstractNeuralNetworkClassifier):
"""The most simple NN with one hidden layer (sigmoid activation) """
"""The most simple NN with one hidden layer (sigmoid activation), for example purposes """
def prepare(self):
n1, n2, n3 = self.layers
W1 = theano.shared(value=numpy.random.normal(size=[n2, n1]).astype(floatX), name='W1')
W2 = theano.shared(value=numpy.random.normal(size=[n3, n2]).astype(floatX), name='W2')
W1 = theano.shared(value=self.random_state.normal(size=[n2, n1]).astype(floatX), name='W1')
W2 = theano.shared(value=self.random_state.normal(size=[n3, n2]).astype(floatX), name='W2')
self.parameters = {'W1': W1, 'W2': W2}

def activation(input):
Expand All @@ -231,7 +251,7 @@ class MultiLayerNetwork(AbstractNeuralNetworkClassifier):
def prepare(self):
activations = [lambda x: x]
for i, layer in list(enumerate(self.layers))[1:]:
W = theano.shared(value=numpy.random.normal(size=[self.layers[i], self.layers[i-1]]), name='W' + str(i))
W = theano.shared(value=self.random_state.normal(size=[self.layers[i], self.layers[i-1]]), name='W' + str(i))
self.parameters[i] = W
# j = i trick is to avoid lambda-capturing of i
activations.append(lambda x, j=i: T.nnet.sigmoid(T.dot(self.parameters[j], activations[j - 1](x))))
Expand All @@ -242,8 +262,8 @@ class RBFNeuralNetwork(AbstractNeuralNetworkClassifier):
"""One hidden layer with normalized RBF activation (Radial Basis Function)"""
def prepare(self):
n1, n2, n3 = self.layers
W1 = theano.shared(value=numpy.random.normal(size=[n2, n1]).astype(floatX), name='W1')
W2 = theano.shared(value=numpy.random.normal(size=[n3, n2]).astype(floatX), name='W2')
W1 = theano.shared(value=self.random_state.normal(size=[n2, n1]).astype(floatX), name='W1')
W2 = theano.shared(value=self.random_state.normal(size=[n3, n2]).astype(floatX), name='W2')
G = theano.shared(value=0.1, name='G')
self.parameters = {'W1': W1, 'W2': W2, 'G': G}

Expand All @@ -259,14 +279,31 @@ class SoftmaxNeuralNetwork(AbstractNeuralNetworkClassifier):
"""One hidden layer, softmax activation function """
def prepare(self):
n1, n2, n3 = self.layers
W1 = theano.shared(value=numpy.random.normal(size=[n2, n1]).astype(floatX), name='W1')
W2 = theano.shared(value=numpy.random.normal(size=[n3, n2]).astype(floatX), name='W2')
W1 = theano.shared(value=self.random_state.normal(size=[n2, n1]).astype(floatX), name='W1')
W2 = theano.shared(value=self.random_state.normal(size=[n3, n2]).astype(floatX), name='W2')
self.parameters = {'W1': W1, 'W2': W2}

def activation(input):
first = T.nnet.softmax(T.dot(W1, input))
return T.nnet.sigmoid(T.dot(W2, first))
return activation


class PairwiseNeuralNetwork(AbstractNeuralNetworkClassifier):
"""The result is computed as h = sigmoid(Ax), output = sum_{ij} B_ij h_i h_j """
def prepare(self):
n1, n2, n3 = self.layers
W1 = theano.shared(value=self.random_state.normal(size=[n2, n1]).astype(floatX), name='W1')
W2 = theano.shared(value=self.random_state.normal(size=[n2, n2, 1]).astype(floatX), name='W2',
broadcastable=[False, False, True])
self.parameters = {'W1': W1, 'W2': W2}

def activation(input):
first = T.tanh(T.dot(W1, input))
second = first.reshape([n2, 1, input.shape[1]]) * first.reshape([1, n2, input.shape[1]])
return T.nnet.sigmoid(T.sum(W2 * second, axis=[0, 1]))

return activation

#endregion

12 changes: 6 additions & 6 deletions cmsuml/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# About

# This module contains some helpful and commonly used functions

"""
Module contains some helpful and frequently used functions
"""
from __future__ import division, print_function, absolute_import
import numpy
import pandas
import pylab
Expand Down Expand Up @@ -54,7 +54,8 @@ def check_sample_weight(y_true, sample_weight):


def distance_quality_matrix(X, y, n_neighbors=50):
"""On of the ways to measure the quality of knning"""
"""On of the ways to measure the quality of knning: each element
shows how frequently events of class A are met in knn of labels of class B"""
labels = numpy.unique(y)
nn = NearestNeighbors(n_neighbors=n_neighbors)
nn.fit(X)
Expand All @@ -63,7 +64,6 @@ def distance_quality_matrix(X, y, n_neighbors=50):
for label1, labels2 in zip(y, numpy.take(y, knn_indices)):
for label2 in labels2:
confusion_matrix[label1, label2] += 1

return confusion_matrix


Expand Down

0 comments on commit 2b82fd5

Please sign in to comment.