Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PC prior distribution for Student T dof #252

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
3 changes: 2 additions & 1 deletion pymc_experimental/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
Experimental probability distributions for stochastic nodes in PyMC.
"""

from pymc_experimental.distributions.continuous import GenExtreme
from pymc_experimental.distributions.continuous import GenExtreme, PCPriorStudentT_dof
from pymc_experimental.distributions.discrete import GeneralizedPoisson
from pymc_experimental.distributions.histogram_utils import histogram_approximation
from pymc_experimental.distributions.multivariate import R2D2M2CP
Expand All @@ -27,6 +27,7 @@
"DiscreteMarkovChain",
"GeneralizedPoisson",
"GenExtreme",
"PCPriorStudentT_dof",
"R2D2M2CP",
"histogram_approximation",
]
75 changes: 72 additions & 3 deletions pymc_experimental/distributions/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,28 @@
The imports from pymc are not fully replicated here: add imports as necessary.
"""

from typing import List, Tuple, Union
from typing import List, Optional, Tuple, Union

import numpy as np
import pytensor.tensor as pt
from pymc.distributions.dist_math import check_parameters
from pymc.distributions.continuous import (
DIST_PARAMETER_TYPES,
PositiveContinuous,
check_parameters,
)
from pymc.distributions.distribution import Continuous
from pymc.distributions.shape_utils import rv_size_is_none
from pymc.pytensorf import floatX
from pytensor.tensor import TensorVariable
from pytensor.tensor.random.op import RandomVariable
from pytensor.tensor.variable import TensorVariable
from scipy import stats

from pymc_experimental.distributions.dist_math import (
pc_prior_studentt_kld_dist_inv_op,
pc_prior_studentt_logp,
studentt_kld_distance,
)


class GenExtremeRV(RandomVariable):
name: str = "Generalized Extreme Value"
Expand Down Expand Up @@ -216,3 +226,62 @@ def moment(rv, size, mu, sigma, xi):
if not rv_size_is_none(size):
mode = pt.full(size, mode)
return mode


class PCPriorStudentT_dof_RV(RandomVariable):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needs a docstring

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually we don't document the RV, but the Distribution class, which doesn't have a docstring either

name = "pc_prior_studentt_dof"
ndim_supp = 0
ndims_params = [0]
dtype = "floatX"
_print_name = ("PCTDoF", "\\operatorname{PCPriorStudentT_dof}")

@classmethod
def rng_fn(cls, rng, lam, size=None) -> np.ndarray:
return pc_prior_studentt_kld_dist_inv_op.spline(rng.exponential(scale=1.0 / lam, size=size))


pc_prior_studentt_dof = PCPriorStudentT_dof_RV()


class PCPriorStudentT_dof(PositiveContinuous):

rv_op = pc_prior_studentt_dof

@classmethod
def dist(
cls,
alpha: Optional[DIST_PARAMETER_TYPES] = None,
U: Optional[DIST_PARAMETER_TYPES] = None,
lam: Optional[DIST_PARAMETER_TYPES] = None,
*args,
**kwargs
):
lam = cls.get_lam(alpha, U, lam)
return super().dist([lam], *args, **kwargs)

def moment(rv, size, lam):
mean = pc_prior_studentt_kld_dist_inv_op(1.0 / lam)
if not rv_size_is_none(size):
mean = pt.full(size, mean)
return mean

@classmethod
def get_lam(cls, alpha=None, U=None, lam=None):
if (alpha is not None) and (U is not None):
return -np.log(alpha) / studentt_kld_distance(U)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return -np.log(alpha) / studentt_kld_distance(U)
return -pt.log(alpha) / studentt_kld_distance(U)

elif lam is not None:
return lam
else:
raise ValueError(
"Incompatible parameterization. Either use alpha and U, or lam to specify the "
"distribution."
)

def logp(value, lam):
res = pc_prior_studentt_logp(value, lam)
res = pt.switch(
pt.lt(value, 2 + 1e-6), # 2 + 1e-6 smallest value for nu
-np.inf,
res,
)
return check_parameters(res, lam > 0, msg="lam > 0")
90 changes: 90 additions & 0 deletions pymc_experimental/distributions/dist_math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2023 The PyMC Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# coding: utf-8

import numpy as np
import pytensor.tensor as pt
from pymc.distributions.dist_math import SplineWrapper
from scipy.interpolate import UnivariateSpline


def studentt_kld_distance(nu):
"""
2 * sqrt(KL divergence divergence) between a student t and a normal random variable. Derived
by Tang in https://arxiv.org/abs/1811.08042.
"""
return pt.sqrt(
1
+ pt.log(2 * pt.reciprocal(nu - 2))
+ 2 * pt.gammaln((nu + 1) / 2)
- 2 * pt.gammaln(nu / 2)
- (nu + 1) * (pt.digamma((nu + 1) / 2) - pt.digamma(nu / 2))
)


def tri_gamma_approx(x):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is already implemented

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This approximation will be much more performant

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I saw you added trigamma recently, I'll give that a try. I used this approx because at the time the gradient wasn't implement yet, where the gradient for the approx is easy. Wasn't concerned with performance at the time, but will take another look

"""Derivative of the digamma function, or second derivative of the gamma function. This is a
series expansion taken from wikipedia: https://en.wikipedia.org/wiki/Trigamma_function. When
the trigamma function in pytensor implements a gradient this function can be removed and
replaced.
"""
return (
1 / x
+ (1 / (2 * x**2))
+ (1 / (6 * x**3))
- (1 / (30 * x**5))
+ (1 / (42 * x**7))
- (1 / (30 * x**9))
+ (5 / (66 * x**11))
- (691 / (2730 * x**13))
+ (7 / (6 * x**15))
)


def pc_prior_studentt_logp(nu, lam):
"""The log probability density function for the PC prior for the degrees of freedom in a
student t likelihood. Derived by Tang in https://arxiv.org/abs/1811.08042.
"""
return (
pt.log(lam)
+ pt.log(
(1 / (nu - 2))
+ ((nu + 1) / 2) * (tri_gamma_approx((nu + 1) / 2) - tri_gamma_approx(nu / 2))
)
- pt.log(4 * studentt_kld_distance(nu))
- lam * studentt_kld_distance(nu)
+ pt.log(2)
)


def _make_pct_inv_func():
"""This function constructs a numerical approximation to the inverse of the KLD distance
function, `studentt_kld_distance`. It does a spline fit for degrees of freedom values
from 2 + 1e-6 to 4000. 2 is the smallest valid value for the student t degrees of freedom, and
values above 4000 don't seem to change much (nearly Gaussian past 30). It's then wrapped by
`SplineWrapper` so it can be used as a PyTensor op.
"""
NU_MIN = 2.0 + 1e-6
nu = np.concatenate((np.linspace(NU_MIN, 2.4, 2000), np.linspace(2.4 + 1e-4, 4000, 10000)))
return UnivariateSpline(
studentt_kld_distance(nu).eval()[::-1],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having an eval is a bit dangerous. If it comes up from an RV you're going to get a random value. The safe thing to do is to constant_fold and raise if it can't be done.

Or create a PyTensor Op that wraps UnivariateSpline

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't we have such an op?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

@bwengals bwengals Dec 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ricardoV94 It only comes from nu, which is passed in above as a fixed numpy array, so I think eval is safe here (unless I'm missing your point). I'm using this to get a spline approximation to the inverse of this function, which is what the [::-1] bit at the end of the inputs is about.

Thanks @ferrine, will look into that. I remember needing to use UnivariateSpline this way because I needed this particular behavior

if ext=3 of ‘const’, return the boundary value.

as nu goes to infinity.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I missed the inputs were constant, nvm on my end

Copy link
Member

@jessegrabowski jessegrabowski Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's always a known constant could you use .data instead of .eval()?

nu[::-1],
ext=3,
k=3,
s=0,
)


pc_prior_studentt_kld_dist_inv_op = SplineWrapper(_make_pct_inv_func())
3 changes: 2 additions & 1 deletion pymc_experimental/gp/latent_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
import pymc as pm
import pytensor.tensor as pt
from pymc.gp.util import JITTER_DEFAULT, stabilize
from pytensor.tensor.linalg import cholesky, solve_triangular
from pytensor.tensor.linalg import cholesky
from pytensor.tensor.slinalg import solve_triangular

solve_lower = partial(solve_triangular, lower=True)
solve_upper = partial(solve_triangular, lower=False)
Expand Down
28 changes: 27 additions & 1 deletion pymc_experimental/tests/distributions/test_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import platform

import numpy as np
import numpy.testing as npt
import pymc as pm

# general imports
Expand All @@ -35,7 +36,32 @@
)

# the distributions to be tested
from pymc_experimental.distributions import GenExtreme
from pymc_experimental.distributions import GenExtreme, PCPriorStudentT_dof


class TestPCPriorStudentT_dof:
"""The test compares the result to what's implemented in INLA. Since it's a specialized
distribution the user shouldn't ever draw random samples from it, calculate the logcdf, or
any of that. The log-probability won't match up exactly to INLA. INLA uses a numeric
approximation and this implementation uses an exact solution for the log-probability. Some
numerical approximations are needed for drawing random samples though.
"""

@pytest.mark.parametrize(
"test_case",
[
{"U": 30, "alpha": 0.5, "dof": 5, "inla_result": -4.792407},
{"U": 30, "alpha": 0.5, "dof": 500, "inla_result": -9.464625},
{"U": 30, "alpha": 0.5, "dof": 1, "inla_result": -np.inf}, # actually INLA throws error
{"U": 30, "alpha": 0.1, "dof": 5, "inla_result": -15.25691},
{"U": 30, "alpha": 0.9, "dof": 5, "inla_result": -2.416043},
{"U": 5, "alpha": 0.99, "dof": 5, "inla_result": -5.992945},
{"U": 5, "alpha": 0.01, "dof": 5, "inla_result": -4.460736},
],
)
def test_logp(self, test_case):
d = PCPriorStudentT_dof.dist(U=test_case["U"], alpha=test_case["alpha"])
npt.assert_allclose(pm.logp(d, test_case["dof"]).eval(), test_case["inla_result"], rtol=0.1)


class TestGenExtremeClass:
Expand Down
Loading