Update the undocumented graphical python examples (#4989)

* Fix k-d tree link. * Fix formula in KernelDensity description. * Remove RealFeatures from util.py, mclda.py. * Update matplotlib import in mclda.py. * Minor change in mclda.py: reformat. * Update matplotlib calls in lda.py. * Minor changes to lda.py: reformat * Change "from x import .." to "import x as .." where x is numpy, shogun. * Correct matplotlib, numpy, shogun import in classifier_gaussian_process_binary_classification.py. Remove RealFeatures. * Minor changes to classifier_gaussian_process_binary_classification.py: reformat. * sg.BinaryLabels -> sg.labels. Fix spe_helix example. * Minor changes + update kernel_ridge_regression.py, kernel_ridge_regression_sinc.py * Compound the examples of converter algorithms. * Replace put with kwargs.
shogun-toolbox · Apr 11, 2020 · 9d625d8 · 9d625d8
1 parent 951d5c7
commit 9d625d8
Show file tree

Hide file tree

Showing 22 changed files with 543 additions and 859 deletions.
diff --git a/examples/undocumented/python/graphical/classifier_gaussian_process_binary_classification.py b/examples/undocumented/python/graphical/classifier_gaussian_process_binary_classification.py
@@ -1,61 +1,52 @@
 # This software is distributed under BSD 3-clause license (see LICENSE file).
 #
 # Authors: Roman Votyakov
+import itertools
 
-from pylab import *
-from numpy import *
-from itertools import *
+import matplotlib.pyplot as plt
+import numpy as np
 
-def generate_toy_data(n_train=100, mean_a=asarray([0, 0]), std_dev_a=1.0, mean_b=3, std_dev_b=0.5):
 
+def generate_toy_data(n_train=100, mean_a=np.asarray([0, 0]), std_dev_a=1.0, mean_b=3, std_dev_b=0.5):
     # positive examples are distributed normally
-    X1 = (random.randn(n_train, 2)*std_dev_a+mean_a).T
+    X1 = (np.random.randn(n_train, 2) * std_dev_a + mean_a).T
 
     # negative examples have a "ring"-like form
-    r = random.randn(n_train)*std_dev_b+mean_b
-    angle = random.randn(n_train)*2*pi
-    X2 = array([r*cos(angle)+mean_a[0], r*sin(angle)+mean_a[1]])
+    r = np.random.randn(n_train) * std_dev_b + mean_b
+    angle = np.random.randn(n_train) * 2 * np.pi
+    X2 = np.array([r * np.cos(angle) + mean_a[0], r * np.sin(angle) + mean_a[1]])
 
     # stack positive and negative examples in a single array
-    X_train = hstack((X1,X2))
+    X_train = np.hstack((X1, X2))
 
     # label positive examples with +1, negative with -1
-    y_train = zeros(n_train*2)
+    y_train = np.zeros(n_train * 2)
     y_train[:n_train] = 1
     y_train[n_train:] = -1
 
     return [X_train, y_train]
 
-def gaussian_process_binary_classification_laplace(X_train, y_train, n_test=50):
 
-    # import all necessary modules from Shogun (some of them require Eigen3)
-    try:
-        from shogun import RealFeatures, BinaryLabels, GaussianKernel, \
-            LogitLikelihood, ProbitLikelihood, ZeroMean, SingleLaplacianInferenceMethod, \
-            EPInferenceMethod, GaussianProcessClassification
-    except ImportError:
-        print('Eigen3 needed for Gaussian Processes')
-        return
+def gaussian_process_binary_classification_laplace(X_train, y_train, n_test=50):
     import shogun as sg
-    import numpy as np
 
     # convert training data into Shogun representation
-    train_features = RealFeatures(X_train)
-    train_labels = BinaryLabels(y_train)
+    train_features = sg.features(X_train)
+    train_labels = sg.labels(y_train)
 
     # generate all pairs in 2d range of testing data
-    x1 = linspace(X_train[0,:].min()-1, X_train[0,:].max()+1, n_test)
-    x2 = linspace(X_train[1,:].min()-1, X_train[1,:].max()+1, n_test)
-    X_test = asarray(list(product(x1, x2))).T
+    x1 = np.linspace(X_train[0, :].min() - 1, X_train[0, :].max() + 1, n_test)
+    x2 = np.linspace(X_train[1, :].min() - 1, X_train[1, :].max() + 1, n_test)
+    X_test = np.asarray(list(itertools.product(x1, x2))).T
 
     # convert testing features into Shogun representation
-    test_features = RealFeatures(X_test)
+    test_features = sg.features(X_test)
 
     # create Gaussian kernel with width = 2.0
-    kernel = sg.kernel("GaussianKernel", log_width=np.log(2.0))
+    kernel = sg.kernel('GaussianKernel', log_width=np.log(2.0))
 
     # create zero mean function
-    mean = ZeroMean()
+    mean = sg.ZeroMean()
 
     # you can easily switch between probit and logit likelihood models
     # by uncommenting/commenting the following lines:
@@ -64,7 +55,7 @@ def gaussian_process_binary_classification_laplace(X_train, y_train, n_test=50):
     # lik = ProbitLikelihood()
 
     # create logit likelihood model
-    lik = LogitLikelihood()
+    lik = sg.LogitLikelihood()
 
     # you can easily switch between Laplace and EP approximation by
     # uncommenting/commenting the following lines:
@@ -73,35 +64,36 @@ def gaussian_process_binary_classification_laplace(X_train, y_train, n_test=50):
     # inf = SingleLaplacianInferenceMethod(kernel, train_features, mean, train_labels, lik)
 
     # specify EP approximation inference method
-    inf = EPInferenceMethod(kernel, train_features, mean, train_labels, lik)
+    inf = sg.EPInferenceMethod(kernel, train_features, mean, train_labels, lik)
 
     # create and train GP classifier, which uses Laplace approximation
-    gp = GaussianProcessClassification(inf)
+    # gp = sg.machine('GaussianProcessClassification', inference_method=inf, labels=train_labels)
+    gp = sg.GaussianProcessClassification(inf)
     gp.train()
 
     # get probabilities p(y*=1|x*) for each testing feature x*
     p_test = gp.get_probabilities(test_features)
 
     # create figure
-    figure()
-    title('Training examples, predictive probability and decision boundary')
+    plt.title('Training examples, predictive probability and decision boundary')
 
     # plot training data
-    plot(X_train[0, argwhere(y_train == 1)], X_train[1, argwhere(y_train == 1)], 'ro')
-    plot(X_train[0, argwhere(y_train == -1)], X_train[1, argwhere(y_train == -1)], 'bo')
+    plt.plot(X_train[0, np.argwhere(y_train == 1)], X_train[1, np.argwhere(y_train == 1)], 'ro')
+    plt.plot(X_train[0, np.argwhere(y_train == -1)], X_train[1, np.argwhere(y_train == -1)], 'bo')
 
     # plot decision boundary
-    contour(x1, x2, reshape(p_test, (n_test, n_test)), levels=[0.5], colors=('black'))
+    plt.contour(x1, x2, np.reshape(p_test, (n_test, n_test)), levels=[0.5], colors='black')
 
     # plot probabilities
-    pcolor(x1, x2, reshape(p_test, (n_test, n_test)))
+    plt.pcolor(x1, x2, np.reshape(p_test, (n_test, n_test)))
 
     # show color bar
-    colorbar()
+    plt.colorbar()
 
     # show figure
-    show()
+    plt.show()
+
 
-if __name__=='__main__':
-    [X_train, y_train] = generate_toy_data()
+if __name__ == '__main__':
+    X_train, y_train = generate_toy_data()
     gaussian_process_binary_classification_laplace(X_train, y_train)
diff --git a/examples/undocumented/python/graphical/classifier_perceptron_graphical.py b/examples/undocumented/python/graphical/classifier_perceptron_graphical.py
@@ -1,78 +1,70 @@
-#!/usr/bin/env python
-
-import numpy as np
 import matplotlib.pyplot as plt
-import latex_plot_inits
+import numpy as np
+
+parameter_list = [[20, 5, 1, 1000, 1, None, 5], [100, 5, 1, 1000, 1, None, 10]]
 
-parameter_list = [[20, 5, 1., 1000, 1, None, 5], [100, 5, 1., 1000, 1, None, 10]]
 
-def classifier_perceptron_graphical(n=100, distance=5, learn_rate=1., max_iter=1000, num_threads=1, seed=None, nperceptrons=5):
-	from shogun import RealFeatures, BinaryLabels
-	from shogun import Perceptron
-	from shogun import MSG_INFO
+def classifier_perceptron_graphical(n=100, distance=5, learn_rate=1, max_iter=1000, num_threads=1, seed=None,
+                                    nperceptrons=5):
+    import shogun as sg
 
-	# 2D data
-	_DIM = 2
+    # 2D data
+    _DIM = 2
 
-	# To get the nice message that the perceptron has converged
-	dummy = BinaryLabels()
-	dummy.io.set_loglevel(MSG_INFO)
+    np.random.seed(seed)
 
-	np.random.seed(seed)
+    # Produce some (probably) linearly separable training data by hand
+    # Two Gaussians at a far enough distance
+    X = np.array(np.random.randn(_DIM, n)) + distance
+    Y = np.array(np.random.randn(_DIM, n))
+    label_train_twoclass = np.hstack((np.ones(n), -np.ones(n)))
 
-	# Produce some (probably) linearly separable training data by hand
-	# Two Gaussians at a far enough distance
-	X = np.array(np.random.randn(_DIM,n))+distance
-	Y = np.array(np.random.randn(_DIM,n))
-	label_train_twoclass = np.hstack((np.ones(n), -np.ones(n)))
+    fm_train_real = np.hstack((X, Y))
+    feats_train = sg.features(fm_train_real)
+    labels = sg.labels(label_train_twoclass)
 
-	fm_train_real = np.hstack((X,Y))
-	feats_train = RealFeatures(fm_train_real)
-	labels = BinaryLabels(label_train_twoclass)
+    perceptron = sg.machine('Perceptron', labels=labels, learn_rate=learn_rate, max_iterations=max_iter,
+                            initialize_hyperplane=False)
 
-	perceptron = Perceptron(feats_train, labels)
-	perceptron.set_learn_rate(learn_rate)
-	perceptron.set_max_iter(max_iter)
-	perceptron.set_initialize_hyperplane(False)
+    # Find limits for visualization
+    x_min = min(np.min(X[0, :]), np.min(Y[0, :]))
+    x_max = max(np.max(X[0, :]), np.max(Y[0, :]))
 
-	# Find limits for visualization
-	x_min = min(np.min(X[0,:]), np.min(Y[0,:]))
-	x_max = max(np.max(X[0,:]), np.max(Y[0,:]))
+    y_min = min(np.min(X[1, :]), np.min(Y[1, :]))
+    y_max = max(np.max(X[1, :]), np.max(Y[1, :]))
 
-	y_min = min(np.min(X[1,:]), np.min(Y[1,:]))
-	y_max = max(np.max(X[1,:]), np.max(Y[1,:]))
+    for i in range(nperceptrons):
+        # Initialize randomly weight vector and bias
+        perceptron.put('w', np.random.random(2))
+        perceptron.put('bias', np.random.random())
 
-	for i in xrange(nperceptrons):
-		# Initialize randomly weight vector and bias
-		perceptron.set_w(np.random.random(2))
-		perceptron.set_bias(np.random.random())
+        # Run the perceptron algorithm
+        perceptron.train(feats_train)
 
-		# Run the perceptron algorithm
-		perceptron.train()
+        # Construct the hyperplane for visualization
+        # Equation of the decision boundary is w^T x + b = 0
+        b = perceptron.get('bias')
+        w = perceptron.get('w')
 
-		# Construct the hyperplane for visualization
-		# Equation of the decision boundary is w^T x + b = 0
-		b = perceptron.get_bias()
-		w = perceptron.get_w()
+        hx = np.linspace(x_min - 1, x_max + 1)
+        hy = -w[1] / w[0] * hx
 
-		hx = np.linspace(x_min-1,x_max+1)
-		hy = -w[1]/w[0] * hx
+        plt.plot(hx, -1 / w[1] * (w[0] * hx + b))
 
-		plt.plot(hx, -1/w[1]*(w[0]*hx+b))
+    # Plot the two-class data
+    plt.scatter(X[0, :], X[1, :], s=40, marker='o', facecolors='none', edgecolors='b')
+    plt.scatter(Y[0, :], Y[1, :], s=40, marker='s', facecolors='none', edgecolors='r')
 
-	# Plot the two-class data
-	plt.scatter(X[0,:], X[1,:], s=40, marker='o', facecolors='none', edgecolors='b')
-	plt.scatter(Y[0,:], Y[1,:], s=40, marker='s', facecolors='none', edgecolors='r')
+    # Customize the plot
+    plt.axis([x_min - 1, x_max + 1, y_min - 1, y_max + 1])
+    plt.title('Rosenblatt\'s Perceptron Algorithm')
+    plt.xlabel('x')
+    plt.ylabel('y')
+    plt.show()
 
-	# Customize the plot
-	plt.axis([x_min-1, x_max+1, y_min-1, y_max+1])
-	plt.title('Rosenblatt\'s Perceptron Algorithm')
-	plt.xlabel('x')
-	plt.ylabel('y')
-	plt.show()
+    return perceptron
 
-	return perceptron
 
-if __name__=='__main__':
-	print('Perceptron graphical')
-	classifier_perceptron_graphical(*parameter_list[0])
+if __name__ == '__main__':
+    print('Perceptron graphical')
+    classifier_perceptron_graphical(*parameter_list[0])
diff --git a/examples/undocumented/python/graphical/cluster_kmeans.py b/examples/undocumented/python/graphical/cluster_kmeans.py
@@ -1,36 +1,39 @@
-from pylab import figure,clf,plot,linspace,pi,show
-from numpy import ones,zeros,cos,sin,concatenate
-from numpy.random import randn
-
-from shogun import *
-
-k=4
-num=1000
-iter=50000
-dist=2.2
-traindat=concatenate((concatenate((randn(1,num)-dist, randn(1,2*num)+dist, randn(1,num)+2*dist),1), concatenate((randn(1,num), randn(1,2*num)+dist, randn(1,num)-dist),1)),0)
-
-trainlab=concatenate((ones(num), 2*ones(num), 3*ones(num), 4*ones(num)))
-
-feats_train=RealFeatures(traindat)
-distance=EuclideanDistance(feats_train, feats_train)
-kmeans=KMeans(k, distance)
+import matplotlib.pyplot as plt
+import numpy as np
+
+import shogun as sg
+
+k = 4
+num = 1000
+iter = 50000
+dist = 2.2
+traindat = np.concatenate((np.concatenate(
+    (np.random.randn(1, num) - dist, np.random.randn(1, 2 * num) + dist, np.random.randn(1, num) + 2 * dist), 1),
+                           np.concatenate((np.random.randn(1, num), np.random.randn(1, 2 * num) + dist,
+                                           np.random.randn(1, num) - dist), 1)), 0)
+
+trainlab = np.concatenate((np.ones(num), 2 * np.ones(num), 3 * np.ones(num), 4 * np.ones(num)))
+
+feats_train = sg.features(traindat)
+distance = sg.distance('EuclideanDistance')
+distance.init(feats_train, feats_train)
+kmeans = sg.machine('KMeans', k=k, distance=distance)
 kmeans.train()
 
-centers = kmeans.get_cluster_centers()
-radi=kmeans.get_radiuses()
+centers = kmeans.get('cluster_centers')
+radi = kmeans.get('radiuses')
 
-figure()
-clf()
-plot(traindat[0,trainlab==+1], traindat[1,trainlab==+1],'rx')
-plot(traindat[0,trainlab==+2], traindat[1,trainlab==+2],'bx', hold=True)
-plot(traindat[0,trainlab==+3], traindat[1,trainlab==+3],'gx', hold=True)
-plot(traindat[0,trainlab==+4], traindat[1,trainlab==+4],'cx', hold=True)
+plt.figure()
+plt.clf()
+plt.plot(traindat[0, trainlab == +1], traindat[1, trainlab == +1], 'rx')
+plt.plot(traindat[0, trainlab == +2], traindat[1, trainlab == +2], 'bx')
+plt.plot(traindat[0, trainlab == +3], traindat[1, trainlab == +3], 'gx')
+plt.plot(traindat[0, trainlab == +4], traindat[1, trainlab == +4], 'cx')
 
-plot(centers[0,:], centers[1,:], 'ko', hold=True)
+plt.plot(centers[0, :], centers[1, :], 'ko')
 
-for i in xrange(k):
-	t = linspace(0, 2*pi, 100)
-	plot(radi[i]*cos(t)+centers[0,i],radi[i]*sin(t)+centers[1,i],'k-', hold=True)
+for i in range(k):
+    t = np.linspace(0, 2 * np.pi, 100)
+    plt.plot(radi[i] * np.cos(t) + centers[0, i], radi[i] * np.sin(t) + centers[1, i], 'k-')
 
-show()
+plt.show()