Update T Process notebook (#705)

* Draft update of BNN notebook * Pre-commit fixes * Address reviewer comments * Updated to v5 * Updated authorship --------- Co-authored-by: Chris Fonnesbeck <[email protected]>
pymc-devs · Dec 20, 2024 · 9857594 · 9857594
1 parent 7575eee
commit 9857594
Show file tree

Hide file tree

Showing 2 changed files with 362 additions and 127 deletions.
diff --git a/examples/gaussian_processes/GP-TProcess.ipynb b/examples/gaussian_processes/GP-TProcess.ipynb
diff --git a/examples/gaussian_processes/GP-TProcess.myst.md b/examples/gaussian_processes/GP-TProcess.myst.md
@@ -10,9 +10,16 @@ kernelspec:
   name: python3
 ---
 
+(GP-TProcess)=
 # Student-t Process
 
-PyMC3 also includes T-process priors.  They are a generalization of a Gaussian process prior to the multivariate Student's T distribution.  The usage is identical to that of `gp.Latent`, except they require a degrees of freedom parameter when they are specified in the model.  For more information, see chapter 9 of [Rasmussen+Williams](http://www.gaussianprocess.org/gpml/), and [Shah et al.](https://arxiv.org/abs/1402.4306).
+:::{post} August 2017
+:tags: t-process, gaussian process, bayesian non-parametrics
+:category: intermediate
+:author: Bill Engels
+:::
+
+PyMC also includes T-process priors.  They are a generalization of a Gaussian process prior to the multivariate Student's T distribution.  The usage is identical to that of `gp.Latent`, except they require a degrees of freedom parameter when they are specified in the model.  For more information, see chapter 9 of [Rasmussen+Williams](http://www.gaussianprocess.org/gpml/), and [Shah et al.](https://arxiv.org/abs/1402.4306).
 
 Note that T processes aren't additive in the same way as GPs, so addition of `TP` objects are not supported.
 
@@ -23,10 +30,13 @@ Note that T processes aren't additive in the same way as GPs, so addition of `TP
 The following code draws samples from a T process prior with 3 degrees of freedom and a Gaussian process, both with the same covariance matrix.
 
 ```{code-cell} ipython3
+import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
-import pymc3 as pm
-import theano.tensor as tt
+import pymc as pm
+import pytensor.tensor as pt
+
+from pymc.gp.util import plot_gp_dist
 
 %matplotlib inline
 ```
@@ -39,33 +49,34 @@ n = 100  # The number of data points
 X = np.linspace(0, 10, n)[:, None]  # The inputs to the GP, they must be arranged as a column vector
 
 # Define the true covariance function and its parameters
-ℓ_true = 1.0
-η_true = 3.0
-cov_func = η_true**2 * pm.gp.cov.Matern52(1, ℓ_true)
+ell_true = 1.0
+eta_true = 3.0
+cov_func = eta_true**2 * pm.gp.cov.Matern52(1, ell_true)
 
 # A mean function that is zero everywhere
 mean_func = pm.gp.mean.Zero()
 
 # The latent function values are one sample from a multivariate normal
 # Note that we have to call `eval()` because PyMC3 built on top of Theano
-tp_samples = pm.MvStudentT.dist(mu=mean_func(X).eval(), cov=cov_func(X).eval(), nu=3).random(size=8)
+tp_samples = pm.draw(pm.MvStudentT.dist(mu=mean_func(X).eval(), scale=cov_func(X).eval(), nu=3), 8)
 
 ## Plot samples from TP prior
 fig = plt.figure(figsize=(12, 5))
-ax = fig.gca()
-ax.plot(X.flatten(), tp_samples.T, lw=3, alpha=0.6)
-ax.set_xlabel("X")
-ax.set_ylabel("y")
-ax.set_title("Samples from TP with DoF=3")
+ax0 = fig.gca()
+ax0.plot(X.flatten(), tp_samples.T, lw=3, alpha=0.6)
+ax0.set_xlabel("X")
+ax0.set_ylabel("y")
+ax0.set_title("Samples from TP with DoF=3")
 
 
-gp_samples = pm.MvNormal.dist(mu=mean_func(X).eval(), cov=cov_func(X).eval()).random(size=8)
+gp_samples = pm.draw(pm.MvNormal.dist(mu=mean_func(X).eval(), cov=cov_func(X).eval()), 8)
 fig = plt.figure(figsize=(12, 5))
-ax = fig.gca()
-ax.plot(X.flatten(), gp_samples.T, lw=3, alpha=0.6)
-ax.set_xlabel("X")
-ax.set_ylabel("y")
-ax.set_title("Samples from GP");
+ax1 = fig.gca()
+ax1.plot(X.flatten(), gp_samples.T, lw=3, alpha=0.6)
+ax1.set_xlabel("X")
+ax1.set_ylabel("y")
+ax1.set_ylim(ax0.get_ylim())
+ax1.set_title("Samples from GP");
 ```
 
 ## Poisson data generated by a T process
@@ -79,16 +90,16 @@ n = 150  # The number of data points
 X = np.linspace(0, 10, n)[:, None]  # The inputs to the GP, they must be arranged as a column vector
 
 # Define the true covariance function and its parameters
-ℓ_true = 1.0
-η_true = 3.0
-cov_func = η_true**2 * pm.gp.cov.ExpQuad(1, ℓ_true)
+ell_true = 1.0
+eta_true = 3.0
+cov_func = eta_true**2 * pm.gp.cov.ExpQuad(1, ell_true)
 
 # A mean function that is zero everywhere
 mean_func = pm.gp.mean.Zero()
 
 # The latent function values are one sample from a multivariate normal
 # Note that we have to call `eval()` because PyMC3 built on top of Theano
-f_true = pm.MvStudentT.dist(mu=mean_func(X).eval(), cov=cov_func(X).eval(), nu=3).random(size=1)
+f_true = pm.draw(pm.MvStudentT.dist(mu=mean_func(X).eval(), scale=cov_func(X).eval(), nu=3), 1)
 y = np.random.poisson(f_true**2)
 
 fig = plt.figure(figsize=(12, 5))
@@ -102,23 +113,22 @@ plt.legend();
 
 ```{code-cell} ipython3
 with pm.Model() as model:
-    ℓ = pm.Gamma("ℓ", alpha=2, beta=2)
-    η = pm.HalfCauchy("η", beta=3)
-    cov = η**2 * pm.gp.cov.ExpQuad(1, ℓ)
+    ell = pm.Gamma("ell", alpha=2, beta=2)
+    eta = pm.HalfCauchy("eta", beta=3)
+    cov = eta**2 * pm.gp.cov.ExpQuad(1, ell)
 
     # informative prior on degrees of freedom < 5
-    ν = pm.Gamma("ν", alpha=2, beta=1)
-    tp = pm.gp.TP(cov_func=cov, nu=ν)
+    nu = pm.Gamma("nu", alpha=2, beta=1)
+    tp = pm.gp.TP(scale_func=cov, nu=nu)
     f = tp.prior("f", X=X)
 
-    # adding a small constant seems to help with numerical stability here
-    y_ = pm.Poisson("y", mu=tt.square(f) + 1e-6, observed=y)
+    pm.Poisson("y", mu=pt.square(f), observed=y)
 
-    tr = pm.sample(1000)
+    tr = pm.sample(target_accept=0.9, nuts_sampler="nutpie", chains=2)
 ```
 
 ```{code-cell} ipython3
-pm.traceplot(tr, var_names=["ℓ", "ν", "η"]);
+az.plot_trace(tr, var_names=["ell", "nu", "eta"]);
 ```
 
 ```{code-cell} ipython3
@@ -131,24 +141,37 @@ with model:
 
 # Sample from the GP conditional distribution
 with model:
-    pred_samples = pm.sample_posterior_predictive(tr, vars=[f_pred], samples=1000)
+    pm.sample_posterior_predictive(tr, var_names=["f_pred"], extend_inferencedata=True)
 ```
 
 ```{code-cell} ipython3
 fig = plt.figure(figsize=(12, 5))
 ax = fig.gca()
-from pymc3.gp.util import plot_gp_dist
 
-plot_gp_dist(ax, np.square(pred_samples["f_pred"]), X_new)
+f_pred_samples = np.square(
+    az.extract(tr.posterior_predictive).astype(np.float32)["f_pred"].values
+).T
+plot_gp_dist(ax, f_pred_samples, X_new)
 plt.plot(X, np.square(f_true), "dodgerblue", lw=3, label="True f")
 plt.plot(X, y, "ok", ms=3, alpha=0.5, label="Observed data")
 plt.xlabel("X")
 plt.ylabel("True f(x)")
-plt.ylim([-2, 20])
 plt.title("Conditional distribution of f_*, given f")
 plt.legend();
 ```
 
+## Authors
+
+* Authored by Bill Engels
+* Updated by Chris Fonnesbeck to use PyMC v5
+
++++
+
+## References
+:::{bibliography}
+:filter: docname in docnames
+:::
+
 ```{code-cell} ipython3
 %load_ext watermark
 %watermark -n -u -v -iv -w