-
-
Notifications
You must be signed in to change notification settings - Fork 62
Genextreme #84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Genextreme #84
Changes from 4 commits
f1a67b9
959677f
929bdfa
8cfc35d
85c3782
c43423f
13f9ade
b181815
14c8252
8471fca
e90be67
306ce90
2301015
0022a2f
544cf42
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,26 @@ | ||
from pymc_experimental.distributions import histogram_utils | ||
from pymc_experimental.distributions.histogram_utils import histogram_approximation | ||
# Copyright 2020 The PyMC Developers | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# coding: utf-8 | ||
""" | ||
Experimental probability distributions for stochastic nodes in PyMC. | ||
""" | ||
|
||
from pymc_experimental.distributions.continuous import ( | ||
GenExtreme, | ||
) | ||
|
||
__all__ = [ | ||
"GenExtreme", | ||
] |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,218 @@ | ||||||
# Copyright 2020 The PyMC Developers | ||||||
ccaprani marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
# | ||||||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
# you may not use this file except in compliance with the License. | ||||||
# You may obtain a copy of the License at | ||||||
# | ||||||
# http://www.apache.org/licenses/LICENSE-2.0 | ||||||
# | ||||||
# Unless required by applicable law or agreed to in writing, software | ||||||
# distributed under the License is distributed on an "AS IS" BASIS, | ||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
# See the License for the specific language governing permissions and | ||||||
# limitations under the License. | ||||||
|
||||||
# coding: utf-8 | ||||||
""" | ||||||
Experimental probability distributions for stochastic nodes in PyMC. | ||||||
|
||||||
The imports from pymc are not fully replicated here: add imports as necessary. | ||||||
""" | ||||||
|
||||||
from typing import List, Tuple | ||||||
import aesara | ||||||
import aesara.tensor as at | ||||||
import numpy as np | ||||||
from scipy import stats | ||||||
|
||||||
from aesara.tensor.random.op import RandomVariable | ||||||
from pymc.distributions.distribution import Continuous | ||||||
from aesara.tensor.var import TensorVariable | ||||||
from pymc.aesaraf import floatX | ||||||
from pymc.distributions.dist_math import check_parameters | ||||||
|
||||||
|
||||||
class GenExtremeRV(RandomVariable): | ||||||
name: str = "Generalized Extreme Value" | ||||||
ndim_supp: int = 0 | ||||||
ndims_params: List[int] = [0, 0, 0] | ||||||
dtype: str = "floatX" | ||||||
_print_name: Tuple[str, str] = ("Generalized Extreme Value", "\\operatorname{GEV}") | ||||||
|
||||||
def __call__( | ||||||
self, mu=0.0, sigma=1.0, xi=0.0, size=None, **kwargs | ||||||
) -> TensorVariable: | ||||||
return super().__call__(mu, sigma, xi, size=size, **kwargs) | ||||||
|
||||||
@classmethod | ||||||
def rng_fn( | ||||||
cls, | ||||||
rng: np.random.RandomState, | ||||||
ccaprani marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
mu: np.ndarray, | ||||||
sigma: np.ndarray, | ||||||
xi: np.ndarray, | ||||||
size: Tuple[int, ...], | ||||||
) -> np.ndarray: | ||||||
# Notice negative here, since remainder of GenExtreme is based on Coles parametrization | ||||||
return stats.genextreme.rvs( | ||||||
c=-xi, loc=mu, scale=sigma, random_state=rng, size=size | ||||||
) | ||||||
|
||||||
|
||||||
gev = GenExtremeRV() | ||||||
|
||||||
|
||||||
class GenExtreme(Continuous): | ||||||
r""" | ||||||
Univariate Generalized Extreme Value log-likelihood | ||||||
|
||||||
The cdf of this distribution is | ||||||
|
||||||
.. math:: | ||||||
|
||||||
G(x \mid \mu, \sigma, \xi) = \exp\left[ -\left(1 + \xi z\right)^{-\frac{1}{\xi}} \right] | ||||||
|
||||||
where | ||||||
|
||||||
.. math:: | ||||||
|
||||||
z = \frac{x - \mu}{\sigma} | ||||||
|
||||||
and is defined on the set: | ||||||
|
||||||
.. math:: | ||||||
|
||||||
\left\{x: 1 + \xi\left(\frac{x-\mu}{\sigma}\right) > 0 \right\}. | ||||||
|
||||||
Note that this parametrization is per Coles (2001), and differs from that of | ||||||
Scipy in the sign of the shape parameter, :math:`\xi`. | ||||||
|
||||||
.. plot:: | ||||||
|
||||||
import matplotlib.pyplot as plt | ||||||
import numpy as np | ||||||
import scipy.stats as st | ||||||
import arviz as az | ||||||
plt.style.use('arviz-darkgrid') | ||||||
x = np.linspace(-10, 20, 200) | ||||||
mus = [0., 4., -1.] | ||||||
sigmas = [2., 2., 4.] | ||||||
xis = [-0.3, 0.0, 0.3] | ||||||
for mu, sigma, xi in zip(mus, sigmas, xis): | ||||||
pdf = st.genextreme.pdf(x, c=-xi, loc=mu, scale=sigma) | ||||||
plt.plot(x, pdf, label=rf'$\mu$ = {mu}, $\sigma$ = {sigma}, $\xi$={xi}') | ||||||
plt.xlabel('x', fontsize=12) | ||||||
plt.ylabel('f(x)', fontsize=12) | ||||||
plt.legend(loc=1) | ||||||
plt.show() | ||||||
|
||||||
|
||||||
======== ========================================================================= | ||||||
Support * :math:`x \in [\mu - \sigma/\xi, +\infty]`, when :math:`\xi > 0` | ||||||
* :math:`x \in \mathbb{R}` when :math:`\xi = 0` | ||||||
* :math:`x \in [-\infty, \mu - \sigma/\xi]`, when :math:`\xi < 0` | ||||||
Mean * :math:`\mu + \sigma(g_1 - 1)/\xi`, when :math:`\xi \neq 0, \xi < 1` | ||||||
* :math:`\mu + \sigma \gamma`, when :math:`\xi = 0` | ||||||
* :math:`\infty`, when :math:`\xi \geq 1` | ||||||
where :math:`\gamma` is the Euler-Mascheroni constant, and | ||||||
:math:`g_k = \Gamma (1-k\xi)` | ||||||
Variance * :math:`\sigma^2 (g_2 - g_1^2)/\xi^2`, when :math:`\xi \neq 0, \xi < 0.5` | ||||||
* :math:`\frac{\pi^2}{6} \sigma^2`, when :math:`\xi = 0` | ||||||
* :math:`\infty`, when :math:`\xi \geq 0.5` | ||||||
======== ========================================================================= | ||||||
|
||||||
Parameters | ||||||
---------- | ||||||
mu: float | ||||||
Location parameter. | ||||||
sigma: float | ||||||
Scale parameter (sigma > 0). | ||||||
xi: float | ||||||
Shape parameter | ||||||
scipy: bool | ||||||
Whether or not to use the Scipy interpretation of the shape parameter | ||||||
(defaults to `False`). | ||||||
|
||||||
References | ||||||
---------- | ||||||
.. [Coles2001] Coles, S.G. (2001). | ||||||
An Introduction to the Statistical Modeling of Extreme Values | ||||||
Springer-Verlag, London | ||||||
|
||||||
""" | ||||||
|
||||||
rv_op = gev | ||||||
|
||||||
@classmethod | ||||||
def dist(cls, mu=0, sigma=1, xi=0, scipy=False, **kwargs): | ||||||
# If SciPy, use its parametrization, otherwise convert to standard | ||||||
if scipy: | ||||||
xi = -xi | ||||||
ricardoV94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
mu = at.as_tensor_variable(floatX(mu)) | ||||||
sigma = at.as_tensor_variable(floatX(sigma)) | ||||||
xi = at.as_tensor_variable(floatX(xi)) | ||||||
|
||||||
return super().dist([mu, sigma, xi], **kwargs) | ||||||
|
||||||
def logp(value, mu, sigma, xi): | ||||||
""" | ||||||
Calculate log-probability of Generalized Extreme Value distribution | ||||||
at specified value. | ||||||
|
||||||
Parameters | ||||||
---------- | ||||||
value: numeric | ||||||
Value(s) for which log-probability is calculated. If the log probabilities for multiple | ||||||
values are desired the values must be provided in a numpy array or Aesara tensor | ||||||
|
||||||
Returns | ||||||
------- | ||||||
TensorVariable | ||||||
""" | ||||||
scaled = (value - mu) / sigma | ||||||
|
||||||
logp_expression = at.switch( | ||||||
at.isclose(xi, 0), | ||||||
-at.log(sigma) - scaled - at.exp(-scaled), | ||||||
-at.log(sigma) | ||||||
- ((xi + 1) / xi) * at.log1p(xi * scaled) | ||||||
- at.pow(1 + xi * scaled, -1 / xi), | ||||||
) | ||||||
# bnd = mu - sigma/xi | ||||||
return check_parameters( | ||||||
logp_expression, | ||||||
1 + xi * (value - mu) / sigma > 0, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This constraint depends on mu so it should probably be in a
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've implemented this now, but not precisely sure the purpose: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The point is that the logp is usually defined for every possible import pymc as pm
print(pm.logp(pm.HalfNormal.dist(), -1.0).eval()) # -np.inf
pm.logp(pm.HalfNormal.dist(sigma=-5), 1).eval() # ValueError: sigma must be positive Scipy is similar, but yields import scipy.stats as st
print(st.halfnorm().logpdf(-1.0)) # -inf
print(st.halfnorm(scale=-5).logpdf(1.0)) # nan There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @ricardoV94 . I can see this alright. Nevertheless, the tests are failing only in the pymc test environment. Here are two cases: Allowing xi to be in the R domain (mathematically true, but not in practice). The
You can see the check from numpy here. It's worth noting that the numpy docs don't recommend using Restricting the domain of xi to the practical range of [-1,+1]. In this case running
where it is checking for an invalid value for xi (-2). But note that it passes when called directly:
and calling
I did have a look at adding a
It would be great to have your input on this, since everything seems to be working fine until it enters the pymc test environment. Hopefully it is just a tweak to one of the conditions in the logp/logc functions perhaps? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can increase the test tolerange in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ricardoV94 good news: Using I can expand to the full domain of In any case, thank you so much for your help on this. I think it's been 11 months in the pipeline, so hopefully we can get it merged?! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I have thought about this sometime ago, but then faced issues. logps vary widely (and non-linearly) between 1e-700xs and 1e+x. More annoying, it's not uncommon to have logps around 0, in which cases relative comparisons or, on the other hand, more loose absolute comparisons become pretty meaningless. If you have a more flexible solution that works in general that would be great. Certainly there are more principled ways of doing these accuracy checks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ricardoV94 >>> x = np.pi*np.array([1e2,1e-1,1e-6,1e-10,1e-100,1e-700])
>>> y = (1 + 1e-6)*x
>>> npt.assert_allclose(x,y) # will assert
AssertionError:
Not equal to tolerance rtol=1e-07, atol=0
Mismatched elements: 5 / 6 (83.3%)
Max absolute difference: 0.00031416
Max relative difference: 9.99999e-07
x: array([3.141593e+002, 3.141593e-001, 3.141593e-006, 3.141593e-010,
3.141593e-100, 0.000000e+000])
y: array([3.141596e+002, 3.141596e-001, 3.141596e-006, 3.141596e-010,
3.141596e-100, 0.000000e+000]) but this does not assert: >>> npt.assert_allclose(x,y,rtol=1e-6,atol=1e-10) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do want to try to open a PR on the main repo to change the logic then? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done: pymc-devs/pymc#6159 |
||||||
# at.switch(xi > 0, value > bnd, value < bnd), | ||||||
sigma > 0, | ||||||
) | ||||||
|
||||||
def logcdf(value, mu, sigma, xi): | ||||||
""" | ||||||
Compute the log of the cumulative distribution function for Generalized Extreme Value | ||||||
distribution at the specified value. | ||||||
|
||||||
Parameters | ||||||
---------- | ||||||
value: numeric or np.ndarray or `TensorVariable` | ||||||
Value(s) for which log CDF is calculated. If the log CDF for | ||||||
multiple values are desired the values must be provided in a numpy | ||||||
array or `TensorVariable`. | ||||||
|
||||||
Returns | ||||||
------- | ||||||
TensorVariable | ||||||
""" | ||||||
scaled = (value - mu) / sigma | ||||||
logc_expression = at.switch( | ||||||
at.isclose(xi, 0), -at.exp(-scaled), -at.pow(1 + xi * scaled, -1 / xi) | ||||||
) | ||||||
return check_parameters(logc_expression, 1 + xi * scaled > 0, sigma > 0) | ||||||
ricardoV94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
def get_moment(value_var, size, mu, sigma, xi): | ||||||
r""" | ||||||
Using the mode, as the mean can be infinite when :math:`\xi > 1` | ||||||
""" | ||||||
mode = at.switch( | ||||||
at.isclose(xi, 0), mu, mu + sigma * (at.pow(1 + xi, -xi) - 1) / xi | ||||||
) | ||||||
return at.full(size, mode, dtype=aesara.config.floatX) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Copyright 2020 The PyMC Developers | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# general imports | ||
import scipy.stats.distributions as ssd | ||
|
||
# test support imports from pymc | ||
from pymc.tests.test_distributions import ( | ||
R, | ||
Rplus, | ||
Domain, | ||
TestMatchesScipy, | ||
) | ||
|
||
# the distributions to be tested | ||
from pymc_experimental.distributions import ( | ||
GenExtreme, | ||
) | ||
|
||
|
||
class TestMatchesScipyX(TestMatchesScipy): | ||
ricardoV94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Wrapper class so that tests of experimental additions can be dropped into | ||
PyMC directly on adoption. | ||
""" | ||
|
||
def test_genextreme(self): | ||
self.check_logp( | ||
GenExtreme, | ||
R, | ||
{"mu": R, "sigma": Rplus, "xi": Domain([-1, -1, -0.5, 0, 0.5, 1, 1])}, | ||
lambda value, mu, sigma, xi: ssd.genextreme.logpdf(value, c=-xi, loc=mu, scale=sigma), | ||
) | ||
self.check_logcdf( | ||
GenExtreme, | ||
R, | ||
{"mu": R, "sigma": Rplus, "xi": Domain([-1, -1, -0.5, 0, 0.5, 1, 1])}, | ||
lambda value, mu, sigma, xi: ssd.genextreme.logcdf(value, c=-xi, loc=mu, scale=sigma), | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# import aesara | ||
import numpy as np | ||
import pytest | ||
import scipy.stats as st | ||
|
||
# from aesara import tensor as at | ||
# from scipy import special | ||
|
||
from pymc_experimental.distributions import ( | ||
GenExtreme, | ||
) | ||
|
||
from pymc.model import Model | ||
|
||
from pymc.tests.test_distributions_moments import assert_moment_is_expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"mu, sigma, xi, size, expected", | ||
[ | ||
(0, 1, 0, None, 0), | ||
(1, np.arange(1, 4), 0.1, None, np.arange(1, 4) * (1.1 ** -0.1 - 1) / 0.1), | ||
(np.arange(5), 1, 0.1, None, np.arange(5) + (1.1 ** -0.1 - 1) / 0.1), | ||
( | ||
0, | ||
1, | ||
np.linspace(-0.2, 0.2, 6), | ||
None, | ||
((1 + np.linspace(-0.2, 0.2, 6)) ** -np.linspace(-0.2, 0.2, 6) - 1) | ||
/ np.linspace(-0.2, 0.2, 6), | ||
), | ||
(1, 2, 0.1, 5, np.full(5, 1 + 2 * (1.1 ** -0.1 - 1) / 0.1)), | ||
( | ||
np.arange(6), | ||
np.arange(1, 7), | ||
np.linspace(-0.2, 0.2, 6), | ||
(3, 6), | ||
np.full( | ||
(3, 6), | ||
np.arange(6) | ||
+ np.arange(1, 7) | ||
* ((1 + np.linspace(-0.2, 0.2, 6)) ** -np.linspace(-0.2, 0.2, 6) - 1) | ||
/ np.linspace(-0.2, 0.2, 6), | ||
), | ||
), | ||
], | ||
) | ||
def test_genextreme_moment(mu, sigma, xi, size, expected): | ||
ricardoV94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
with Model() as model: | ||
GenExtreme("x", mu=mu, sigma=sigma, xi=xi, size=size) | ||
assert_moment_is_expected(model, expected) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# Copyright 2020 The PyMC Developers | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# general imports | ||
|
||
# test support imports from pymc | ||
from pymc.tests.test_distributions_random import ( | ||
BaseTestDistribution, | ||
seeded_scipy_distribution_builder, | ||
) | ||
|
||
# the distributions to be tested | ||
import pymc_experimental as pmx | ||
|
||
|
||
class TestGenExtreme(BaseTestDistribution): | ||
ricardoV94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
pymc_dist = pmx.GenExtreme | ||
pymc_dist_params = {"mu": 0, "sigma": 1, "xi": -0.1} | ||
expected_rv_op_params = {"mu": 0, "sigma": 1, "xi": -0.1} | ||
# Notice, using different parametrization of xi sign to scipy | ||
reference_dist_params = {"loc": 0, "scale": 1, "c": 0.1} | ||
reference_dist = seeded_scipy_distribution_builder("genextreme") | ||
tests_to_run = [ | ||
"check_pymc_params_match_rv_op", | ||
"check_pymc_draws_match_reference", | ||
"check_rv_size", | ||
] |
Uh oh!
There was an error while loading. Please reload this page.