pymc-devs · bwengals · Oct 12, 2023 · Oct 12, 2023 · Oct 12, 2023 · Oct 20, 2023
diff --git a/pymc_experimental/distributions/__init__.py b/pymc_experimental/distributions/__init__.py
@@ -17,7 +17,7 @@
 Experimental probability distributions for stochastic nodes in PyMC.
 """
 
-from pymc_experimental.distributions.continuous import GenExtreme
+from pymc_experimental.distributions.continuous import GenExtreme, PCPriorStudentT_dof
 from pymc_experimental.distributions.discrete import GeneralizedPoisson
 from pymc_experimental.distributions.histogram_utils import histogram_approximation
 from pymc_experimental.distributions.multivariate import R2D2M2CP
@@ -27,6 +27,7 @@
     "DiscreteMarkovChain",
     "GeneralizedPoisson",
     "GenExtreme",
+    "PCPriorStudentT_dof",
     "R2D2M2CP",
     "histogram_approximation",
 ]
diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py
@@ -19,18 +19,28 @@
 The imports from pymc are not fully replicated here: add imports as necessary.
 """
 
-from typing import List, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 import numpy as np
 import pytensor.tensor as pt
-from pymc.distributions.dist_math import check_parameters
+from pymc.distributions.continuous import (
+    DIST_PARAMETER_TYPES,
+    PositiveContinuous,
+    check_parameters,
+)
 from pymc.distributions.distribution import Continuous
 from pymc.distributions.shape_utils import rv_size_is_none
 from pymc.pytensorf import floatX
+from pytensor.tensor import TensorVariable
 from pytensor.tensor.random.op import RandomVariable
-from pytensor.tensor.variable import TensorVariable
 from scipy import stats
 
+from pymc_experimental.distributions.dist_math import (
+    pc_prior_studentt_kld_dist_inv_op,
+    pc_prior_studentt_logp,
+    studentt_kld_distance,
+)
+
 
 class GenExtremeRV(RandomVariable):
     name: str = "Generalized Extreme Value"
@@ -216,3 +226,62 @@ def moment(rv, size, mu, sigma, xi):
         if not rv_size_is_none(size):
             mode = pt.full(size, mode)
         return mode
+
+
+class PCPriorStudentT_dof_RV(RandomVariable):
+    name = "pc_prior_studentt_dof"
+    ndim_supp = 0
+    ndims_params = [0]
+    dtype = "floatX"
+    _print_name = ("PCTDoF", "\\operatorname{PCPriorStudentT_dof}")
+
+    @classmethod
+    def rng_fn(cls, rng, lam, size=None) -> np.ndarray:
+        return pc_prior_studentt_kld_dist_inv_op.spline(rng.exponential(scale=1.0 / lam, size=size))
+
+
+pc_prior_studentt_dof = PCPriorStudentT_dof_RV()
+
+
+class PCPriorStudentT_dof(PositiveContinuous):
+
+    rv_op = pc_prior_studentt_dof
+
+    @classmethod
+    def dist(
+        cls,
+        alpha: Optional[DIST_PARAMETER_TYPES] = None,
+        U: Optional[DIST_PARAMETER_TYPES] = None,
+        lam: Optional[DIST_PARAMETER_TYPES] = None,
+        *args,
+        **kwargs
+    ):
+        lam = cls.get_lam(alpha, U, lam)
+        return super().dist([lam], *args, **kwargs)
+
+    def moment(rv, size, lam):
+        mean = pc_prior_studentt_kld_dist_inv_op(1.0 / lam)
+        if not rv_size_is_none(size):
+            mean = pt.full(size, mean)
+        return mean
+
+    @classmethod
+    def get_lam(cls, alpha=None, U=None, lam=None):
+        if (alpha is not None) and (U is not None):
+            return -np.log(alpha) / studentt_kld_distance(U)
-            return -np.log(alpha) / studentt_kld_distance(U)
+            return -pt.log(alpha) / studentt_kld_distance(U)
-            return -np.log(alpha) / studentt_kld_distance(U)
+            return -pt.log(alpha) / studentt_kld_distance(U)
+        elif lam is not None:
+            return lam
+        else:
+            raise ValueError(
+                "Incompatible parameterization. Either use alpha and U, or lam to specify the "
+                "distribution."
+            )
+
+    def logp(value, lam):
+        res = pc_prior_studentt_logp(value, lam)
+        res = pt.switch(
+            pt.lt(value, 2 + 1e-6),  # 2 + 1e-6 smallest value for nu
+            -np.inf,
+            res,
+        )
+        return check_parameters(res, lam > 0, msg="lam > 0")
diff --git a/pymc_experimental/distributions/dist_math.py b/pymc_experimental/distributions/dist_math.py
@@ -0,0 +1,103 @@
+#   Copyright 2023 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+# coding: utf-8
+
+
+from typing import List, Tuple, Union
+
+import numpy as np
+import pytensor.tensor as pt
+from pymc.distributions.dist_math import check_parameters, SplineWrapper
+from pymc.distributions.distribution import Continuous
+from pymc.distributions.shape_utils import rv_size_is_none
+from pymc.pytensorf import floatX
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.var import TensorVariable
+from scipy import stats
+from scipy.interpolate import UnivariateSpline
+
+
+def studentt_kld_distance(nu):
+    """
+    2 * sqrt(KL divergence divergence) between a student t and a normal random variable.  Derived 
+    by Tang in https://arxiv.org/abs/1811.08042.
+    """
+    return pt.sqrt(
+        1 + pt.log(2 * pt.reciprocal(nu - 2))
+        + 2 * pt.gammaln((nu + 1) / 2)
+        - 2 * pt.gammaln(nu / 2)
+        - (nu + 1) * (pt.digamma((nu + 1) / 2) - pt.digamma(nu / 2))
+    )
+
+
+def tri_gamma_approx(x):
+    """ Derivative of the digamma function, or second derivative of the gamma function.  This is a 
+    series expansion taken from wikipedia: https://en.wikipedia.org/wiki/Trigamma_function.  When
+    the trigamma function in pytensor implements a gradient this function can be removed and 
+    replaced.
+    """
+    return (
+        1 / x
+        + (1 / (2 * x**2))
+        + (1 / (6 * x**3))
+        - (1 / (30 * x**5))
+        + (1 / (42 * x**7))
+        - (1 / (30 * x**9))
+        + (5 / (66 * x**11))
+        - (691 / (2730 * x**13))
+        + (7 / (6 * x**15))
+    )
+
+
+def pc_prior_studentt_logp(nu, lam):
+    """ The log probability density function for the PC prior for the degrees of freedom in a
+    student t likelihood. Derived by Tang in https://arxiv.org/abs/1811.08042.
+    """
+    return (
+        pt.log(lam)
+        + pt.log((1 / (nu - 2)) + ((nu + 1) / 2)
+                 * (tri_gamma_approx((nu + 1) / 2) - tri_gamma_approx(nu / 2)))
+        - pt.log(4 * studentt_kld_distance(nu))
+        - lam * studentt_kld_distance(nu)
+        + pt.log(2)
+    )
+
+
+def _make_pct_inv_func():
+    """ This function constructs a numerical approximation to the inverse of the KLD distance 
+    function, `studentt_kld_distance`.  It does a spline fit for degrees of freedom values
+    from 2 + 1e-6 to 4000.  2 is the smallest valid value for the student t degrees of freedom, and
+    values above 4000 don't seem to change much (nearly Gaussian past 30).  It's then wrapped by 
+    `SplineWrapper` so it can be used as a PyTensor op.
+    """
+    NU_MIN = 2.0 + 1e-6
+    nu = np.concatenate((
+        np.linspace(NU_MIN, 2.4, 2000),
+        np.linspace(2.4 + 1e-4, 4000, 10000)
+    ))
+    return UnivariateSpline(
+        studentt_kld_distance(nu).eval()[::-1], nu[::-1], ext=3, k=3, s=0,
+    )
+pc_prior_studentt_kld_dist_inv_op = SplineWrapper(_make_pct_inv_func())
+
+
+def pc_negbinom_kld_distance_inv(alpha):
+    """
+    The inverse of the KLD distance for the PC prior for alpha when doing regression with 
+    overdispersion and a negative binomial likelihood. This is the inverse and not the 
+    actual KLD distance because PyMC parameterizes alpha as 1 / alpha (higher alpha -> more 
+    Poisson, lower alpha -> more overdispersion).
+    """
+    return pt.sqrt(2.0 * (pt.log(1.0 / alpha) - pt.psi(1.0 / alpha)))
diff --git a/pymc_experimental/gp/latent_approx.py b/pymc_experimental/gp/latent_approx.py
@@ -17,7 +17,8 @@
 import pymc as pm
 import pytensor.tensor as pt
 from pymc.gp.util import JITTER_DEFAULT, stabilize
-from pytensor.tensor.linalg import cholesky, solve_triangular
+from pytensor.tensor.linalg import cholesky
+from pytensor.tensor.slinalg import solve_triangular
 
 solve_lower = partial(solve_triangular, lower=True)
 solve_upper = partial(solve_triangular, lower=False)

diff --git a/pymc_experimental/tests/distributions/test_continuous.py b/pymc_experimental/tests/distributions/test_continuous.py
@@ -14,6 +14,7 @@
 import platform
 
 import numpy as np
+import numpy.testing as npt
 import pymc as pm
 
 # general imports
@@ -35,7 +36,32 @@
 )
 
 # the distributions to be tested
-from pymc_experimental.distributions import GenExtreme
+from pymc_experimental.distributions import GenExtreme, PCPriorStudentT_dof
+
+
+class TestPCPriorStudentT_dof:
+    """The test compares the result to what's implemented in INLA.  Since it's a specialized
+    distribution the user shouldn't ever draw random samples from it, calculate the logcdf, or
+    any of that.  The log-probability won't match up exactly to INLA.  INLA uses a numeric
+    approximation and this implementation uses an exact solution in the relevant domain and a
+    numerical approximation out to the tail.
+    """
+
+    @pytest.mark.parametrize(
+        "test_case",
+        [
+            {"U": 30, "alpha": 0.5, "dof": 5, "inla_result": -4.792407},
+            {"U": 30, "alpha": 0.5, "dof": 5000, "inla_result": -14.03713},
+            {"U": 30, "alpha": 0.5, "dof": 1, "inla_result": -np.inf},  # actually INLA throws error
+            {"U": 30, "alpha": 0.1, "dof": 5, "inla_result": -15.25691},
+            {"U": 30, "alpha": 0.9, "dof": 5, "inla_result": -2.416043},
+            {"U": 5, "alpha": 0.99, "dof": 5, "inla_result": -5.992945},
+            {"U": 5, "alpha": 0.01, "dof": 5, "inla_result": -4.460736},
+        ],
+    )
+    def test_logp(self, test_case):
+        d = PCPriorStudentT_dof.dist(U=test_case["U"], alpha=test_case["alpha"])
+        npt.assert_allclose(pm.logp(d, test_case["dof"]).eval(), test_case["inla_result"], rtol=0.1)
 
 
 class TestGenExtremeClass: