Add Genextreme distribution (#84)

ccaprani · ricardoV94 · web-flow · commit 78b7f2f315d9 · 2022-09-28T20:51:21.000+02:00
Co-authored-by: Ricardo Vieira &lt;ricardo.vieira1994@gmail.com&gt;
diff --git a/pymc_experimental/distributions/__init__.py b/pymc_experimental/distributions/__init__.py
@@ -1,2 +1,24 @@
-from pymc_experimental.distributions import histogram_utils
-from pymc_experimental.distributions.histogram_utils import histogram_approximation
+#   Copyright 2022 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+# coding: utf-8
+"""
+Experimental probability distributions for stochastic nodes in PyMC.
+"""
+
+from pymc_experimental.distributions.continuous import GenExtreme
+
+__all__ = [
+    "GenExtreme",
+]
diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py
@@ -0,0 +1,218 @@
+#   Copyright 2022 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+# coding: utf-8
+"""
+Experimental probability distributions for stochastic nodes in PyMC.
+
+The imports from pymc are not fully replicated here: add imports as necessary.
+"""
+
+from typing import List, Tuple, Union
+
+import aesara.tensor as at
+import numpy as np
+from aesara.tensor.random.op import RandomVariable
+from aesara.tensor.var import TensorVariable
+from pymc.aesaraf import floatX
+from pymc.distributions.dist_math import check_parameters
+from pymc.distributions.distribution import Continuous
+from pymc.distributions.shape_utils import rv_size_is_none
+from scipy import stats
+
+
+class GenExtremeRV(RandomVariable):
+    name: str = "Generalized Extreme Value"
+    ndim_supp: int = 0
+    ndims_params: List[int] = [0, 0, 0]
+    dtype: str = "floatX"
+    _print_name: Tuple[str, str] = ("Generalized Extreme Value", "\\operatorname{GEV}")
+
+    def __call__(self, mu=0.0, sigma=1.0, xi=0.0, size=None, **kwargs) -> TensorVariable:
+        return super().__call__(mu, sigma, xi, size=size, **kwargs)
+
+    @classmethod
+    def rng_fn(
+        cls,
+        rng: Union[np.random.RandomState, np.random.Generator],
+        mu: np.ndarray,
+        sigma: np.ndarray,
+        xi: np.ndarray,
+        size: Tuple[int, ...],
+    ) -> np.ndarray:
+        # Notice negative here, since remainder of GenExtreme is based on Coles parametrization
+        return stats.genextreme.rvs(c=-xi, loc=mu, scale=sigma, random_state=rng, size=size)
+
+
+gev = GenExtremeRV()
+
+
+class GenExtreme(Continuous):
+    r"""
+        Univariate Generalized Extreme Value log-likelihood
+
+    The cdf of this distribution is
+
+    .. math::
+
+       G(x \mid \mu, \sigma, \xi) = \exp\left[ -\left(1 + \xi z\right)^{-\frac{1}{\xi}} \right]
+
+    where
+
+    .. math::
+
+        z = \frac{x - \mu}{\sigma}
+
+    and is defined on the set:
+
+    .. math::
+
+        \left\{x: 1 + \xi\left(\frac{x-\mu}{\sigma}\right) > 0 \right\}.
+
+    Note that this parametrization is per Coles (2001), and differs from that of
+    Scipy in the sign of the shape parameter, :math:`\xi`.
+
+    .. plot::
+
+        import matplotlib.pyplot as plt
+        import numpy as np
+        import scipy.stats as st
+        import arviz as az
+        plt.style.use('arviz-darkgrid')
+        x = np.linspace(-10, 20, 200)
+        mus = [0., 4., -1.]
+        sigmas = [2., 2., 4.]
+        xis = [-0.3, 0.0, 0.3]
+        for mu, sigma, xi in zip(mus, sigmas, xis):
+            pdf = st.genextreme.pdf(x, c=-xi, loc=mu, scale=sigma)
+            plt.plot(x, pdf, label=rf'$\mu$ = {mu}, $\sigma$ = {sigma}, $\xi$={xi}')
+        plt.xlabel('x', fontsize=12)
+        plt.ylabel('f(x)', fontsize=12)
+        plt.legend(loc=1)
+        plt.show()
+
+
+    ========  =========================================================================
+    Support   * :math:`x \in [\mu - \sigma/\xi, +\infty]`, when :math:`\xi > 0`
+              * :math:`x \in \mathbb{R}` when :math:`\xi = 0`
+              * :math:`x \in [-\infty, \mu - \sigma/\xi]`, when :math:`\xi < 0`
+    Mean      * :math:`\mu + \sigma(g_1 - 1)/\xi`, when :math:`\xi \neq 0, \xi < 1`
+              * :math:`\mu + \sigma \gamma`, when :math:`\xi = 0`
+              * :math:`\infty`, when :math:`\xi \geq 1`
+                where :math:`\gamma` is the Euler-Mascheroni constant, and
+                :math:`g_k = \Gamma (1-k\xi)`
+    Variance  * :math:`\sigma^2 (g_2 - g_1^2)/\xi^2`, when :math:`\xi \neq 0, \xi < 0.5`
+              * :math:`\frac{\pi^2}{6} \sigma^2`, when :math:`\xi = 0`
+              * :math:`\infty`, when :math:`\xi \geq 0.5`
+    ========  =========================================================================
+
+    Parameters
+    ----------
+    mu: float
+        Location parameter.
+    sigma: float
+        Scale parameter (sigma > 0).
+    xi: float
+        Shape parameter
+    scipy: bool
+        Whether or not to use the Scipy interpretation of the shape parameter
+        (defaults to `False`).
+
+    References
+    ----------
+    .. [Coles2001] Coles, S.G. (2001).
+        An Introduction to the Statistical Modeling of Extreme Values
+        Springer-Verlag, London
+
+    """
+
+    rv_op = gev
+
+    @classmethod
+    def dist(cls, mu=0, sigma=1, xi=0, scipy=False, **kwargs):
+        # If SciPy, use its parametrization, otherwise convert to standard
+        if scipy:
+            xi = -xi
+        mu = at.as_tensor_variable(floatX(mu))
+        sigma = at.as_tensor_variable(floatX(sigma))
+        xi = at.as_tensor_variable(floatX(xi))
+
+        return super().dist([mu, sigma, xi], **kwargs)
+
+    def logp(value, mu, sigma, xi):
+        """
+        Calculate log-probability of Generalized Extreme Value distribution
+        at specified value.
+
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log-probability is calculated. If the log probabilities for multiple
+            values are desired the values must be provided in a numpy array or Aesara tensor
+
+        Returns
+        -------
+        TensorVariable
+        """
+        scaled = (value - mu) / sigma
+
+        logp_expression = at.switch(
+            at.isclose(xi, 0),
+            -at.log(sigma) - scaled - at.exp(-scaled),
+            -at.log(sigma)
+            - ((xi + 1) / xi) * at.log1p(xi * scaled)
+            - at.pow(1 + xi * scaled, -1 / xi),
+        )
+
+        logp = at.switch(at.gt(1 + xi * scaled, 0.0), logp_expression, -np.inf)
+
+        return check_parameters(
+            logp, sigma > 0, at.and_(xi > -1, xi < 1), msg="sigma > 0 or -1 < xi < 1"
+        )
+
+    def logcdf(value, mu, sigma, xi):
+        """
+        Compute the log of the cumulative distribution function for Generalized Extreme Value
+        distribution at the specified value.
+
+        Parameters
+        ----------
+        value: numeric or np.ndarray or `TensorVariable`
+            Value(s) for which log CDF is calculated. If the log CDF for
+            multiple values are desired the values must be provided in a numpy
+            array or `TensorVariable`.
+
+        Returns
+        -------
+        TensorVariable
+        """
+        scaled = (value - mu) / sigma
+        logc_expression = at.switch(
+            at.isclose(xi, 0), -at.exp(-scaled), -at.pow(1 + xi * scaled, -1 / xi)
+        )
+
+        logc = at.switch(1 + xi * (value - mu) / sigma > 0, logc_expression, -np.inf)
+
+        return check_parameters(
+            logc, sigma > 0, at.and_(xi > -1, xi < 1), msg="sigma > 0 or -1 < xi < 1"
+        )
+
+    def moment(rv, size, mu, sigma, xi):
+        r"""
+        Using the mode, as the mean can be infinite when :math:`\xi > 1`
+        """
+        mode = at.switch(at.isclose(xi, 0), mu, mu + sigma * (at.pow(1 + xi, -xi) - 1) / xi)
+        if not rv_size_is_none(size):
+            mode = at.full(size, mode)
+        return mode
diff --git a/pymc_experimental/tests/distributions/__init__.py b/pymc_experimental/tests/distributions/__init__.py
@@ -0,0 +1,2 @@
+from pymc_experimental.distributions import histogram_utils
+from pymc_experimental.distributions.histogram_utils import histogram_approximation
diff --git a/pymc_experimental/tests/distributions/test_continuous.py b/pymc_experimental/tests/distributions/test_continuous.py
@@ -0,0 +1,137 @@
+#   Copyright 2020 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+# general imports
+import aesara
+import numpy as np
+import pymc as pm
+import pytest
+import scipy.stats.distributions as sp
+
+# test support imports from pymc
+from pymc.tests.distributions.util import (
+    BaseTestDistributionRandom,
+    Domain,
+    R,
+    Rplusbig,
+    assert_moment_is_expected,
+    check_logcdf,
+    check_logp,
+    seeded_scipy_distribution_builder,
+)
+from pymc.tests.helpers import select_by_precision
+
+# the distributions to be tested
+from pymc_experimental.distributions import GenExtreme
+
+
+class TestGenExtremeClass:
+    """
+    Wrapper class so that tests of experimental additions can be dropped into
+    PyMC directly on adoption.
+
+    pm.logp(GenExtreme.dist(mu=0.,sigma=1.,xi=0.5),value=-0.01)
+    """
+
+    @pytest.mark.xfail(
+        condition=(aesara.config.floatX == "float32"),
+        reason="PyMC underflows earlier than scipy on float32",
+    )
+    def test_logp(self):
+        check_logp(
+            GenExtreme,
+            R,
+            {
+                "mu": R,
+                "sigma": Rplusbig,
+                "xi": Domain([-1, -0.99, -0.5, 0, 0.5, 0.99, 1]),
+            },
+            lambda value, mu, sigma, xi: sp.genextreme.logpdf(value, c=-xi, loc=mu, scale=sigma)
+            if 1 + xi * (value - mu) / sigma > 0
+            else -np.inf,
+        )
+
+        if aesara.config.floatX == "float32":
+            raise Exception("Flaky test: It passed this time, but XPASS is not allowed.")
+
+    def test_logcdf(self):
+        check_logcdf(
+            GenExtreme,
+            R,
+            {
+                "mu": R,
+                "sigma": Rplusbig,
+                "xi": Domain([-1, -0.99, -0.5, 0, 0.5, 0.99, 1]),
+            },
+            lambda value, mu, sigma, xi: sp.genextreme.logcdf(value, c=-xi, loc=mu, scale=sigma)
+            if 1 + xi * (value - mu) / sigma > 0
+            else -np.inf,
+            decimal=select_by_precision(float64=6, float32=2),
+        )
+
+    @pytest.mark.parametrize(
+        "mu, sigma, xi, size, expected",
+        [
+            (0, 1, 0, None, 0),
+            (1, np.arange(1, 4), 0.1, None, 1 + np.arange(1, 4) * (1.1**-0.1 - 1) / 0.1),
+            (np.arange(5), 1, 0.1, None, np.arange(5) + (1.1**-0.1 - 1) / 0.1),
+            (
+                0,
+                1,
+                np.linspace(-0.2, 0.2, 6),
+                None,
+                ((1 + np.linspace(-0.2, 0.2, 6)) ** -np.linspace(-0.2, 0.2, 6) - 1)
+                / np.linspace(-0.2, 0.2, 6),
+            ),
+            (1, 2, 0.1, 5, np.full(5, 1 + 2 * (1.1**-0.1 - 1) / 0.1)),
+            (
+                np.arange(6),
+                np.arange(1, 7),
+                np.linspace(-0.2, 0.2, 6),
+                (3, 6),
+                np.full(
+                    (3, 6),
+                    np.arange(6)
+                    + np.arange(1, 7)
+                    * ((1 + np.linspace(-0.2, 0.2, 6)) ** -np.linspace(-0.2, 0.2, 6) - 1)
+                    / np.linspace(-0.2, 0.2, 6),
+                ),
+            ),
+        ],
+    )
+    def test_genextreme_moment(self, mu, sigma, xi, size, expected):
+        with pm.Model() as model:
+            GenExtreme("x", mu=mu, sigma=sigma, xi=xi, size=size)
+        assert_moment_is_expected(model, expected)
+
+    def test_gen_extreme_scipy_kwarg(self):
+        dist = GenExtreme.dist(xi=1, scipy=False)
+        assert dist.owner.inputs[-1].eval() == 1
+
+        dist = GenExtreme.dist(xi=1, scipy=True)
+        assert dist.owner.inputs[-1].eval() == -1
+
+
+class TestGenExtreme(BaseTestDistributionRandom):
+    pymc_dist = GenExtreme
+    pymc_dist_params = {"mu": 0, "sigma": 1, "xi": -0.1}
+    expected_rv_op_params = {"mu": 0, "sigma": 1, "xi": -0.1}
+    # Notice, using different parametrization of xi sign to scipy
+    reference_dist_params = {"loc": 0, "scale": 1, "c": 0.1}
+    reference_dist = seeded_scipy_distribution_builder("genextreme")
+    tests_to_run = [
+        "check_pymc_params_match_rv_op",
+        "check_pymc_draws_match_reference",
+        "check_rv_size",
+    ]
diff --git a/pymc_experimental/tests/test_histogram_approximation.py b/pymc_experimental/tests/test_histogram_approximation.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from pymc_experimental.distributions import histogram_utils`
	`2`	`+from pymc_experimental.distributions.histogram_utils import histogram_approximation`