diff --git a/pymc_experimental/distributions/__init__.py b/pymc_experimental/distributions/__init__.py index f06db969..e3ad9d8d 100644 --- a/pymc_experimental/distributions/__init__.py +++ b/pymc_experimental/distributions/__init__.py @@ -17,7 +17,7 @@ Experimental probability distributions for stochastic nodes in PyMC. """ -from pymc_experimental.distributions.continuous import Chi, GenExtreme, Maxwell +from pymc_experimental.distributions.continuous import Chi, GenExtreme, Maxwell, GeneralizedNormal from pymc_experimental.distributions.discrete import ( BetaNegativeBinomial, GeneralizedPoisson, @@ -37,4 +37,5 @@ "histogram_approximation", "Chi", "Maxwell", + "GeneralizedNormal", ] diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index dcf9b775..70eefc71 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -351,3 +351,170 @@ def dist(cls, a, **kwargs): class_name="Maxwell", **kwargs, ) + + +class GeneralizedNormalRV(RandomVariable): + name: str = "Generalized Normal" + ndim_supp: int = 0 + ndims_params: List[int] = [0, 0, 0] + dtype: str = "floatX" + _print_name: Tuple[str, str] = ("Generalized Normal", "\\operatorname{GenNorm}") + + def __call__(self, loc=0.0, alpha=1.0, beta=2.0, **kwargs) -> TensorVariable: + return super().__call__(loc, alpha, beta, **kwargs) + + @classmethod + def rng_fn( + cls, + rng: np.random.RandomState, + loc: np.ndarray, + alpha: np.ndarray, + beta: np.ndarray, + size: Tuple[int, ...], + ) -> np.ndarray: + return stats.gennorm.rvs(beta, loc=loc, scale=alpha, size=size, random_state=rng) + +# Create the actual `RandomVariable` `Op`... +gennorm = GeneralizedNormalRV() + + +class GeneralizedNormal(Continuous): + r""" + Univariate Generalized Normal distribution + + The cdf of this distribution is + + .. math:: + + + G(x \mid \mu, \alpha, \beta) = \frac{1}{2} + \mathrm{sign}(x-\mu) \frac{1}{2\Gamma(1/\beta)} \gamma\left(1/\beta, \left|\frac{x-\mu}{\alpha}\right|^\beta\right) + + where :math:`\gamma()` is the unnormalized incomplete lower gamma function. + + The support of the function is the whole real line, while the parameters are defined as + + .. math:: + + \alpha, \beta > 0 + + This is the same parametrization used in Scipy and also follows + `Wikipedia `_ + + The :math:`\beta` parameter controls the kurtosis of the distribution, where the excess + kurtosis is given by + + .. math:: + + \mathrm{exKurt}(\beta) = \frac{\Gamma(5/\beta) Gamma(1/\beta)}{\gamma(3/\beta)^2} - 3 + + .. plot:: + + :context: close-figs + + import matplotlib.pyplot as plt + import numpy as np + import scipy.stats as stats + from cycler import cycler + + plt.style.use('arviz-darkgrid') + x = np.linspace(-3, 3, 200) + + betas = [0.5, 2, 8] + beta_cycle = cycler(color=['r', 'g', 'b']) + alphas = [1.0, 2.0] + alpha_cycle = cycler(ls=['-', '--']) + cycler = beta_cycle*alpha_cycle + icycler = iter(cycler) + + for beta in betas: + for alpha in alphas: + pdf = scipy.stats.gennorm.pdf(x, beta, loc=0.0, scale=alpha) + args = next(icycler) + plt.plot(x, pdf, label=r'$\alpha=$ {0} $\beta=$ {1}'.format(alpha, beta), **args) + plt.xlabel('x', fontsize=12) + plt.ylabel('f(x)', fontsize=12) + plt.legend(loc=1) + plt.show() + + ======== ========================================================================= + Support :math:`x \in (-\infty, \infty)` + Mean :math:`\mu` + Variance :math:`\frac{\alpha^2 \Gamma(3/\beta)}{\Gamma(1/\beta)}` + ======== ========================================================================= + + Parameters + ---------- + Parameters + ---------- + mu : float + Location parameter. + alpha : float + Scale parameter (alpha > 0). Note that the variance of the distribution is + not alpha squared. In particular, whene beta=2 (normal distribution), the variance + is :math:`alpha^2/2`. + beta : float + Shape parameter (beta > 0). This is directly related to the kurtosis which is a + monotonically decreasing function of beta + + """ + rv_op = gennorm + + @classmethod + def dist(cls, mu, alpha, beta, **kwargs) -> RandomVariable: + mu = pt.as_tensor_variable(floatX(mu)) + alpha = pt.as_tensor_variable(floatX(alpha)) + beta = pt.as_tensor_variable(floatX(beta)) + + return super().dist([mu, alpha, beta], **kwargs) + + + # moment here returns the mean + def moment(rv, size, mu, alpha, beta): + moment, *_ = pt.broadcast_arrays(mu, alpha, beta) + if not rv_size_is_none(size): + moment = pt.full(size, moment) + return moment + + def logp(value, mu, alpha, beta): + + z = (value-mu)/alpha + lp = pt.log(0.5*beta) - pt.log(alpha) - pt.gammaln(1.0/beta) - pt.pow(pt.abs(z), beta) + + return check_parameters( + lp, + alpha > 0, + beta > 0, + msg="alpha > 0, beta > 0", + ) + + + def logcdf(value, mu, alpha, beta): + """ + Compute the log of the cumulative distribution function for a Generalized + Normal distribution + + Parameters + ---------- + value: numeric or np.ndarray or `TensorVariable` + Value(s) for which the log CDF is calculated + alpha: numeric + Scale parameter + beta: numeric + Shape parameter + + """ + + + z = pt.abs((value-mu)/alpha) + c = pt.sign(value-mu) + + # This follows the Wikipedia entry for the function - scipy has + # opted for a slightly different version using gammaincc instead. + # Note that on the Wikipedia page the unnormalised \gamma function + # is used, while gammainc is a normalised function + cdf = 0.5 + 0.5*pt.sign(value-mu)*pt.gammainc(1.0/beta, pt.pow(z, beta)) + + return pt.log(cdf) + + + diff --git a/pymc_experimental/tests/distributions/test_continuous.py b/pymc_experimental/tests/distributions/test_continuous.py index 5df4aef1..5f2dbc72 100644 --- a/pymc_experimental/tests/distributions/test_continuous.py +++ b/pymc_experimental/tests/distributions/test_continuous.py @@ -33,7 +33,7 @@ ) # the distributions to be tested -from pymc_experimental.distributions import Chi, GenExtreme, Maxwell +from pymc_experimental.distributions import Chi, GenExtreme, Maxwell, GeneralizedNormal class TestGenExtremeClass: @@ -182,3 +182,46 @@ def test_logcdf(self): {"a": Rplus}, lambda value, a: sp.maxwell.logcdf(value, scale=a), ) + + +class TestGeneralizedNormal: + """ + Wrapper class so that tests of experimental additions can be dropped into + PyMC directly on adoption. + """ + + def test_logp(self): + check_logp( + pymc_dist=GeneralizedNormal, + domain=R, + paradomains={"mu": R, "alpha": Rplus, "beta": Rplus}, + scipy_logp=lambda value, mu, alpha, beta: sp.gennorm.logpdf(value, beta, loc=mu, scale=alpha), + ) + + def test_logcdf(self): + check_logcdf( + pymc_dist=GeneralizedNormal, + domain=R, + paradomains={"mu": R, "alpha": Rplus, "beta": Rplus}, + scipy_logcdf=lambda value, mu, alpha, beta: sp.gennorm.logcdf(value, beta, loc=mu, scale=alpha), + ) + + + def test_gennorm_moment(self, mu, alpha, beta, size, expected): + with pm.Model() as model: + GeneralizedNormal("x", mu=mu, alpha=alpha, beta=beta, size=size) + assert_moment_is_expected(model, expected) + + + +class TestGeneralizedNormal(BaseTestDistributionRandom): + pymc_dist = GeneralizedNormal + pymc_dist_params = {"mu": 0, "alpha": 1, "beta": 2.0} + expected_rv_op_params = {"mu": 0, "alpha": 1, "beta": 2.0} + reference_dist_params = {"loc": 0, "scale": 1, "beta": 2.0} + reference_dist = seeded_scipy_distribution_builder("gennorm") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ]