Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the undocumented graphical python examples #4989

Merged
merged 25 commits into from
Apr 11, 2020
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c5afe1f
Fix k-d tree link.
vinnik-dmitry07 Apr 1, 2020
35ab782
Fix formula in KernelDensity description.
vinnik-dmitry07 Apr 2, 2020
a135005
Remove RealFeatures from util.py, mclda.py.
vinnik-dmitry07 Apr 2, 2020
38e4913
Update matplotlib import in mclda.py.
vinnik-dmitry07 Apr 2, 2020
aafd635
Minor change in mclda.py: reformat.
vinnik-dmitry07 Apr 2, 2020
e237d43
Update matplotlib calls in lda.py.
vinnik-dmitry07 Apr 2, 2020
56a8f97
Minor changes to lda.py: reformat.
vinnik-dmitry07 Apr 2, 2020
a5bbb76
Change "from x import .." to "import x as .." where x is numpy, shogun.
vinnik-dmitry07 Apr 2, 2020
e9b52ea
Correct matplotlib, numpy, shogun import in classifier_gaussian_proce…
vinnik-dmitry07 Apr 3, 2020
0c687e8
Minor changes to classifier_gaussian_process_binary_classification.py…
vinnik-dmitry07 Apr 3, 2020
130f707
Fully fix all commit files.
vinnik-dmitry07 Apr 3, 2020
98a741a
Fix link 2
vinnik-dmitry07 Apr 3, 2020
24fe869
Rename m_nupdates to m_num_updates. Changed the tolerance type: float…
vinnik-dmitry07 Apr 3, 2020
9e2cd7c
Fixed the StochasticProximityEmbedding typos.
vinnik-dmitry07 Apr 3, 2020
bc2d6d3
Reformat StochasticProximityEmbedding.
vinnik-dmitry07 Apr 3, 2020
f3ffc72
The changes I am not sure you be glad to see =)
vinnik-dmitry07 Apr 3, 2020
2a63463
sg.BinaryLabels -> sg.labels. Fix spe_helix example.
vinnik-dmitry07 Apr 3, 2020
e78f24d
Minor changes + update kernel_ridge_regression.py, kernel_ridge_regre…
vinnik-dmitry07 Apr 4, 2020
fb319cb
Compound the examples of converter algorithms.
vinnik-dmitry07 Apr 4, 2020
4b05ad4
Minor changes.
vinnik-dmitry07 Apr 4, 2020
38f7d85
Fix the transfer from BinaryLabels.
vinnik-dmitry07 Apr 4, 2020
dc1b03a
Replace put with kwargs.
vinnik-dmitry07 Apr 4, 2020
66247d7
Correct the formula 2.
vinnik-dmitry07 Apr 4, 2020
486edda
Revert changing to override and default.
vinnik-dmitry07 Apr 8, 2020
282fa77
Trigger CI.
vinnik-dmitry07 Apr 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,61 +1,52 @@
# This software is distributed under BSD 3-clause license (see LICENSE file).
#
# Authors: Roman Votyakov
import itertools

from pylab import *
from numpy import *
from itertools import *
import matplotlib.pyplot as plt
import numpy as np

def generate_toy_data(n_train=100, mean_a=asarray([0, 0]), std_dev_a=1.0, mean_b=3, std_dev_b=0.5):

def generate_toy_data(n_train=100, mean_a=np.asarray([0, 0]), std_dev_a=1.0, mean_b=3, std_dev_b=0.5):
# positive examples are distributed normally
X1 = (random.randn(n_train, 2)*std_dev_a+mean_a).T
X1 = (np.random.randn(n_train, 2) * std_dev_a + mean_a).T

# negative examples have a "ring"-like form
r = random.randn(n_train)*std_dev_b+mean_b
angle = random.randn(n_train)*2*pi
X2 = array([r*cos(angle)+mean_a[0], r*sin(angle)+mean_a[1]])
r = np.random.randn(n_train) * std_dev_b + mean_b
angle = np.random.randn(n_train) * 2 * np.pi
X2 = np.array([r * np.cos(angle) + mean_a[0], r * np.sin(angle) + mean_a[1]])

# stack positive and negative examples in a single array
X_train = hstack((X1,X2))
X_train = np.hstack((X1, X2))

# label positive examples with +1, negative with -1
y_train = zeros(n_train*2)
y_train = np.zeros(n_train * 2)
y_train[:n_train] = 1
y_train[n_train:] = -1

return [X_train, y_train]

def gaussian_process_binary_classification_laplace(X_train, y_train, n_test=50):

# import all necessary modules from Shogun (some of them require Eigen3)
try:
from shogun import RealFeatures, BinaryLabels, GaussianKernel, \
LogitLikelihood, ProbitLikelihood, ZeroMean, SingleLaplacianInferenceMethod, \
EPInferenceMethod, GaussianProcessClassification
except ImportError:
print('Eigen3 needed for Gaussian Processes')
return
def gaussian_process_binary_classification_laplace(X_train, y_train, n_test=50):
import shogun as sg
import numpy as np

# convert training data into Shogun representation
train_features = RealFeatures(X_train)
train_labels = BinaryLabels(y_train)
train_features = sg.features(X_train)
train_labels = sg.BinaryLabels(y_train)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sg.labels should work


# generate all pairs in 2d range of testing data
x1 = linspace(X_train[0,:].min()-1, X_train[0,:].max()+1, n_test)
x2 = linspace(X_train[1,:].min()-1, X_train[1,:].max()+1, n_test)
X_test = asarray(list(product(x1, x2))).T
x1 = np.linspace(X_train[0, :].min() - 1, X_train[0, :].max() + 1, n_test)
x2 = np.linspace(X_train[1, :].min() - 1, X_train[1, :].max() + 1, n_test)
X_test = np.asarray(list(itertools.product(x1, x2))).T

# convert testing features into Shogun representation
test_features = RealFeatures(X_test)
test_features = sg.features(X_test)

# create Gaussian kernel with width = 2.0
kernel = sg.kernel("GaussianKernel", log_width=np.log(2.0))
kernel = sg.kernel('GaussianKernel', log_width=np.log(2.0))

# create zero mean function
mean = ZeroMean()
mean = sg.ZeroMean()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a factory for mean functions? I think so as we have a gp meta example

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not find.


# you can easily switch between probit and logit likelihood models
# by uncommenting/commenting the following lines:
Expand All @@ -64,7 +55,7 @@ def gaussian_process_binary_classification_laplace(X_train, y_train, n_test=50):
# lik = ProbitLikelihood()

# create logit likelihood model
lik = LogitLikelihood()
lik = sg.LogitLikelihood()

# you can easily switch between Laplace and EP approximation by
# uncommenting/commenting the following lines:
Expand All @@ -73,35 +64,35 @@ def gaussian_process_binary_classification_laplace(X_train, y_train, n_test=50):
# inf = SingleLaplacianInferenceMethod(kernel, train_features, mean, train_labels, lik)

# specify EP approximation inference method
inf = EPInferenceMethod(kernel, train_features, mean, train_labels, lik)
inf = sg.EPInferenceMethod(kernel, train_features, mean, train_labels, lik)

# create and train GP classifier, which uses Laplace approximation
gp = GaussianProcessClassification(inf)
gp = sg.GaussianProcessClassification(inf)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all those should have a factory, meta examples should guide you

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image

Copy link
Contributor Author

@vinnik-dmitry07 vinnik-dmitry07 Apr 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How to run this method from the machine? (get_probabilities)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just checked, it is not possible currently, and we need to refactor this stuff (meta example is also not yet ported to new api I realised). Ignore my comment then

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gp.train()

# get probabilities p(y*=1|x*) for each testing feature x*
p_test = gp.get_probabilities(test_features)

# create figure
figure()
title('Training examples, predictive probability and decision boundary')
plt.title('Training examples, predictive probability and decision boundary')

# plot training data
plot(X_train[0, argwhere(y_train == 1)], X_train[1, argwhere(y_train == 1)], 'ro')
plot(X_train[0, argwhere(y_train == -1)], X_train[1, argwhere(y_train == -1)], 'bo')
plt.plot(X_train[0, np.argwhere(y_train == 1)], X_train[1, np.argwhere(y_train == 1)], 'ro')
plt.plot(X_train[0, np.argwhere(y_train == -1)], X_train[1, np.argwhere(y_train == -1)], 'bo')

# plot decision boundary
contour(x1, x2, reshape(p_test, (n_test, n_test)), levels=[0.5], colors=('black'))
plt.contour(x1, x2, np.reshape(p_test, (n_test, n_test)), levels=[0.5], colors='black')

# plot probabilities
pcolor(x1, x2, reshape(p_test, (n_test, n_test)))
plt.pcolor(x1, x2, np.reshape(p_test, (n_test, n_test)))

# show color bar
colorbar()
plt.colorbar()

# show figure
show()
plt.show()


if __name__=='__main__':
[X_train, y_train] = generate_toy_data()
if __name__ == '__main__':
X_train, y_train = generate_toy_data()
gaussian_process_binary_classification_laplace(X_train, y_train)
Original file line number Diff line number Diff line change
@@ -1,78 +1,75 @@
#!/usr/bin/env python

import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import latex_plot_inits

parameter_list = [[20, 5, 1., 1000, 1, None, 5], [100, 5, 1., 1000, 1, None, 10]]
parameter_list = [[20, 5, 1, 1000, 1, None, 5], [100, 5, 1, 1000, 1, None, 10]]


def classifier_perceptron_graphical(n=100, distance=5, learn_rate=1, max_iter=1000, num_threads=1, seed=None,
nperceptrons=5):
import shogun as sg

def classifier_perceptron_graphical(n=100, distance=5, learn_rate=1., max_iter=1000, num_threads=1, seed=None, nperceptrons=5):
from shogun import RealFeatures, BinaryLabels
from shogun import Perceptron
from shogun import MSG_INFO
# 2D data
_DIM = 2

# 2D data
_DIM = 2
# To get the nice message that the perceptron has converged
dummy = sg.BinaryLabels()
# dummy.io.set_loglevel(sg.MSG_INFO)

# To get the nice message that the perceptron has converged
dummy = BinaryLabels()
dummy.io.set_loglevel(MSG_INFO)
np.random.seed(seed)

np.random.seed(seed)
# Produce some (probably) linearly separable training data by hand
# Two Gaussians at a far enough distance
X = np.array(np.random.randn(_DIM, n)) + distance
Y = np.array(np.random.randn(_DIM, n))
label_train_twoclass = np.hstack((np.ones(n), -np.ones(n)))

# Produce some (probably) linearly separable training data by hand
# Two Gaussians at a far enough distance
X = np.array(np.random.randn(_DIM,n))+distance
Y = np.array(np.random.randn(_DIM,n))
label_train_twoclass = np.hstack((np.ones(n), -np.ones(n)))
fm_train_real = np.hstack((X, Y))
feats_train = sg.features(fm_train_real)
labels = sg.BinaryLabels(label_train_twoclass)

fm_train_real = np.hstack((X,Y))
feats_train = RealFeatures(fm_train_real)
labels = BinaryLabels(label_train_twoclass)
perceptron = sg.machine('Perceptron', labels=labels, learn_rate=learn_rate, max_iterations=max_iter)
perceptron.put('initialize_hyperplane', False)

perceptron = Perceptron(feats_train, labels)
perceptron.set_learn_rate(learn_rate)
perceptron.set_max_iter(max_iter)
perceptron.set_initialize_hyperplane(False)
# Find limits for visualization
x_min = min(np.min(X[0, :]), np.min(Y[0, :]))
x_max = max(np.max(X[0, :]), np.max(Y[0, :]))

# Find limits for visualization
x_min = min(np.min(X[0,:]), np.min(Y[0,:]))
x_max = max(np.max(X[0,:]), np.max(Y[0,:]))
y_min = min(np.min(X[1, :]), np.min(Y[1, :]))
y_max = max(np.max(X[1, :]), np.max(Y[1, :]))

y_min = min(np.min(X[1,:]), np.min(Y[1,:]))
y_max = max(np.max(X[1,:]), np.max(Y[1,:]))
for i in range(nperceptrons):
# Initialize randomly weight vector and bias
perceptron.put('w', np.random.random(2))
perceptron.put('bias', np.random.random())

for i in xrange(nperceptrons):
# Initialize randomly weight vector and bias
perceptron.set_w(np.random.random(2))
perceptron.set_bias(np.random.random())
# Run the perceptron algorithm
perceptron.train(feats_train)

# Run the perceptron algorithm
perceptron.train()
# Construct the hyperplane for visualization
# Equation of the decision boundary is w^T x + b = 0
b = perceptron.get('bias')
w = perceptron.get('w')

# Construct the hyperplane for visualization
# Equation of the decision boundary is w^T x + b = 0
b = perceptron.get_bias()
w = perceptron.get_w()
hx = np.linspace(x_min - 1, x_max + 1)
hy = -w[1] / w[0] * hx

hx = np.linspace(x_min-1,x_max+1)
hy = -w[1]/w[0] * hx
plt.plot(hx, -1 / w[1] * (w[0] * hx + b))

plt.plot(hx, -1/w[1]*(w[0]*hx+b))
# Plot the two-class data
plt.scatter(X[0, :], X[1, :], s=40, marker='o', facecolors='none', edgecolors='b')
plt.scatter(Y[0, :], Y[1, :], s=40, marker='s', facecolors='none', edgecolors='r')

# Plot the two-class data
plt.scatter(X[0,:], X[1,:], s=40, marker='o', facecolors='none', edgecolors='b')
plt.scatter(Y[0,:], Y[1,:], s=40, marker='s', facecolors='none', edgecolors='r')
# Customize the plot
plt.axis([x_min - 1, x_max + 1, y_min - 1, y_max + 1])
plt.title('Rosenblatt\'s Perceptron Algorithm')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

# Customize the plot
plt.axis([x_min-1, x_max+1, y_min-1, y_max+1])
plt.title('Rosenblatt\'s Perceptron Algorithm')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
return perceptron

return perceptron

if __name__=='__main__':
print('Perceptron graphical')
classifier_perceptron_graphical(*parameter_list[0])
if __name__ == '__main__':
print('Perceptron graphical')
classifier_perceptron_graphical(*parameter_list[0])
63 changes: 33 additions & 30 deletions examples/undocumented/python/graphical/cluster_kmeans.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,39 @@
from pylab import figure,clf,plot,linspace,pi,show
from numpy import ones,zeros,cos,sin,concatenate
from numpy.random import randn

from shogun import *

k=4
num=1000
iter=50000
dist=2.2
traindat=concatenate((concatenate((randn(1,num)-dist, randn(1,2*num)+dist, randn(1,num)+2*dist),1), concatenate((randn(1,num), randn(1,2*num)+dist, randn(1,num)-dist),1)),0)

trainlab=concatenate((ones(num), 2*ones(num), 3*ones(num), 4*ones(num)))

feats_train=RealFeatures(traindat)
distance=EuclideanDistance(feats_train, feats_train)
kmeans=KMeans(k, distance)
import matplotlib.pyplot as plt
import numpy as np

import shogun as sg

k = 4
num = 1000
iter = 50000
dist = 2.2
traindat = np.concatenate((np.concatenate(
(np.random.randn(1, num) - dist, np.random.randn(1, 2 * num) + dist, np.random.randn(1, num) + 2 * dist), 1),
np.concatenate((np.random.randn(1, num), np.random.randn(1, 2 * num) + dist,
np.random.randn(1, num) - dist), 1)), 0)

trainlab = np.concatenate((np.ones(num), 2 * np.ones(num), 3 * np.ones(num), 4 * np.ones(num)))

feats_train = sg.features(traindat)
distance = sg.distance('EuclideanDistance')
distance.init(feats_train, feats_train)
kmeans = sg.machine('KMeans', k=k, distance=distance)
kmeans.train()

centers = kmeans.get_cluster_centers()
radi=kmeans.get_radiuses()
centers = kmeans.get('cluster_centers')
radi = kmeans.get('radiuses')

figure()
clf()
plot(traindat[0,trainlab==+1], traindat[1,trainlab==+1],'rx')
plot(traindat[0,trainlab==+2], traindat[1,trainlab==+2],'bx', hold=True)
plot(traindat[0,trainlab==+3], traindat[1,trainlab==+3],'gx', hold=True)
plot(traindat[0,trainlab==+4], traindat[1,trainlab==+4],'cx', hold=True)
plt.figure()
plt.clf()
plt.plot(traindat[0, trainlab == +1], traindat[1, trainlab == +1], 'rx')
plt.plot(traindat[0, trainlab == +2], traindat[1, trainlab == +2], 'bx')
plt.plot(traindat[0, trainlab == +3], traindat[1, trainlab == +3], 'gx')
plt.plot(traindat[0, trainlab == +4], traindat[1, trainlab == +4], 'cx')

plot(centers[0,:], centers[1,:], 'ko', hold=True)
plt.plot(centers[0, :], centers[1, :], 'ko')

for i in xrange(k):
t = linspace(0, 2*pi, 100)
plot(radi[i]*cos(t)+centers[0,i],radi[i]*sin(t)+centers[1,i],'k-', hold=True)
for i in range(k):
t = np.linspace(0, 2 * np.pi, 100)
plt.plot(radi[i] * np.cos(t) + centers[0, i], radi[i] * np.sin(t) + centers[1, i], 'k-')

show()
plt.show()
Loading