Skip to content

Commit 2e8505a

Browse files
Add GeneralizedPoisson distribution
Co-authored-by: Luciano Paz <luciano.paz.neuro@gmail.com>
1 parent ed9f54c commit 2e8505a

File tree

4 files changed

+300
-1
lines changed

4 files changed

+300
-1
lines changed

docs/api_reference.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Distributions
2929
:toctree: generated/
3030

3131
GenExtreme
32+
GeneralizedPoisson
3233
histogram_utils.histogram_approximation
3334
DiscreteMarkovChain
3435

pymc_experimental/distributions/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
"""
1919

2020
from pymc_experimental.distributions.continuous import GenExtreme
21+
from pymc_experimental.distributions.discrete import GeneralizedPoisson
2122
from pymc_experimental.distributions.timeseries import DiscreteMarkovChain
2223

23-
__all__ = ["GenExtreme", "DiscreteMarkovChain"]
24+
__all__ = [
25+
"DiscreteMarkovChain",
26+
"GeneralizedPoisson",
27+
"GenExtreme",
28+
]
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
# Copyright 2023 The PyMC Developers
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import numpy as np
16+
import pymc as pm
17+
from pymc.distributions.dist_math import check_parameters, factln, logpow
18+
from pymc.distributions.shape_utils import rv_size_is_none
19+
from pytensor import tensor as pt
20+
from pytensor.tensor.random.op import RandomVariable
21+
22+
23+
class GeneralizedPoissonRV(RandomVariable):
24+
name = "generalized_poisson"
25+
ndim_supp = 0
26+
ndims_params = [0, 0]
27+
dtype = "int64"
28+
_print_name = ("GeneralizedPoisson", "\\operatorname{GeneralizedPoisson}")
29+
30+
@classmethod
31+
def rng_fn(cls, rng, theta, lam, size):
32+
theta = np.asarray(theta)
33+
lam = np.asarray(lam)
34+
35+
if size is not None:
36+
dist_size = size
37+
else:
38+
dist_size = np.broadcast_shapes(theta.shape, lam.shape)
39+
40+
# A mix of 2 algorithms described by Famoye (1997) is used depending on parameter values
41+
# 0: Inverse method, computed on the log scale. Used when lam <= 0.
42+
# 1: Branching method. Used when lambda > 0.
43+
x = np.empty(dist_size)
44+
idxs_mask = np.broadcast_to(lam < 0, dist_size)
45+
if np.any(idxs_mask):
46+
x[idxs_mask] = cls._inverse_rng_fn(rng, theta, lam, dist_size, idxs_mask=idxs_mask)[
47+
idxs_mask
48+
]
49+
idxs_mask = ~idxs_mask
50+
if np.any(idxs_mask):
51+
x[idxs_mask] = cls._branching_rng_fn(rng, theta, lam, dist_size, idxs_mask=idxs_mask)[
52+
idxs_mask
53+
]
54+
return x
55+
56+
@classmethod
57+
def _inverse_rng_fn(cls, rng, theta, lam, dist_size, idxs_mask):
58+
# We handle x/0 and log(0) issues with branching
59+
with np.errstate(divide="ignore", invalid="ignore"):
60+
log_u = np.log(rng.uniform(size=dist_size))
61+
pos_lam = lam > 0
62+
abs_log_lam = np.log(np.abs(lam))
63+
theta_m_lam = theta - lam
64+
log_s = -theta
65+
log_p = log_s.copy()
66+
x_ = 0
67+
x = np.zeros(dist_size)
68+
below_cutpoint = log_s < log_u
69+
while np.any(below_cutpoint[idxs_mask]):
70+
x_ += 1
71+
x[below_cutpoint] += 1
72+
log_c = np.log(theta_m_lam + lam * x_)
73+
# Compute log(1 + lam / C)
74+
log1p_lam_m_C = np.where(
75+
pos_lam,
76+
np.log1p(np.exp(abs_log_lam - log_c)),
77+
pm.math.log1mexp_numpy(abs_log_lam - log_c, negative_input=True),
78+
)
79+
log_p = log_c + log1p_lam_m_C * (x_ - 1) + log_p - np.log(x_) - lam
80+
log_s = np.logaddexp(log_s, log_p)
81+
below_cutpoint = log_s < log_u
82+
return x
83+
84+
@classmethod
85+
def _branching_rng_fn(cls, rng, theta, lam, dist_size, idxs_mask):
86+
lam_ = np.abs(lam) # This algorithm is only valid for positive lam
87+
y = rng.poisson(theta, size=dist_size)
88+
x = y.copy()
89+
higher_than_zero = y > 0
90+
while np.any(higher_than_zero[idxs_mask]):
91+
y = rng.poisson(lam_ * y)
92+
x[higher_than_zero] = x[higher_than_zero] + y[higher_than_zero]
93+
higher_than_zero = y > 0
94+
return x
95+
96+
97+
generalized_poisson = GeneralizedPoissonRV()
98+
99+
100+
class GeneralizedPoisson(pm.distributions.Discrete):
101+
R"""
102+
Generalized Poisson.
103+
Used to model count data that can be either overdispersed or underdispersed.
104+
Offers greater flexibility than the standard Poisson which assumes equidispersion,
105+
where the mean is equal to the variance.
106+
The pmf of this distribution is
107+
108+
.. math:: f(x \mid \mu, \lambda) =
109+
\frac{\mu (\mu + \lambda x)^{x-1} e^{-\mu - \lambda x}}{x!}
110+
======== ======================================
111+
Support :math:`x \in \mathbb{N}_0`
112+
Mean :math:`\frac{\mu}{1 - \lambda}`
113+
Variance :math:`\frac{\mu}{(1 - \lambda)^3}`
114+
======== ======================================
115+
116+
Parameters
117+
----------
118+
mu : tensor_like of float
119+
Mean parameter (mu > 0).
120+
lam : tensor_like of float
121+
Dispersion parameter (max(-1, -mu/4) <= lam <= 1).
122+
123+
Notes
124+
-----
125+
When lam = 0, the Generalized Poisson reduces to the standard Poisson with the same mu.
126+
When lam < 0, the mean is greater than the variance (underdispersion).
127+
When lam > 0, the mean is less than the variance (overdispersion).
128+
129+
References
130+
----------
131+
The PMF is taken from [1] and the random generator function is adapted from [2].
132+
.. [1] Consul, PoC, and Felix Famoye. "Generalized Poisson regression model."
133+
Communications in Statistics-Theory and Methods 21.1 (1992): 89-109.
134+
.. [2] Famoye, Felix. "Generalized Poisson random variate generation." American
135+
Journal of Mathematical and Management Sciences 17.3-4 (1997): 219-237.
136+
"""
137+
138+
rv_op = generalized_poisson
139+
140+
@classmethod
141+
def dist(cls, mu, lam, **kwargs):
142+
mu = pt.as_tensor_variable(mu)
143+
lam = pt.as_tensor_variable(lam)
144+
return super().dist([mu, lam], **kwargs)
145+
146+
def moment(rv, size, mu, lam):
147+
mean = pt.floor(mu / (1 - lam))
148+
if not rv_size_is_none(size):
149+
mean = pt.full(size, mean)
150+
return mean
151+
152+
def logp(value, mu, lam):
153+
mu_lam_value = mu + lam * value
154+
logprob = np.log(mu) + logpow(mu_lam_value, value - 1) - mu_lam_value - factln(value)
155+
156+
# Probability is 0 when value > m, where m is the largest positive integer for
157+
# which mu + m * lam > 0 (when lam < 0).
158+
logprob = pt.switch(
159+
pt.or_(
160+
mu_lam_value < 0,
161+
value < 0,
162+
),
163+
-np.inf,
164+
logprob,
165+
)
166+
167+
return check_parameters(
168+
logprob,
169+
0 < mu,
170+
pt.abs(lam) <= 1,
171+
(-mu / 4) <= lam,
172+
msg="0 < mu, max(-1, -mu/4)) <= lam <= 1",
173+
)
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# Copyright 2023 The PyMC Developers
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import numpy as np
15+
import pymc as pm
16+
import pytensor
17+
import pytensor.tensor as pt
18+
import pytest
19+
import scipy.stats
20+
from pymc.logprob.utils import ParameterValueError
21+
from pymc.testing import (
22+
BaseTestDistributionRandom,
23+
Domain,
24+
Rplus,
25+
assert_moment_is_expected,
26+
discrete_random_tester,
27+
)
28+
from pytensor import config
29+
30+
from pymc_experimental.distributions import GeneralizedPoisson
31+
32+
33+
class TestGeneralizedPoisson:
34+
class TestRandomVariable(BaseTestDistributionRandom):
35+
pymc_dist = GeneralizedPoisson
36+
pymc_dist_params = {"mu": 4.0, "lam": 1.0}
37+
expected_rv_op_params = {"mu": 4.0, "lam": 1.0}
38+
tests_to_run = [
39+
"check_pymc_params_match_rv_op",
40+
"check_rv_size",
41+
]
42+
43+
def test_random_matches_poisson(self):
44+
discrete_random_tester(
45+
dist=self.pymc_dist,
46+
paramdomains={"mu": Rplus, "lam": Domain([0], edges=(None, None))},
47+
ref_rand=lambda mu, lam, size: scipy.stats.poisson.rvs(mu, size=size),
48+
)
49+
50+
@pytest.mark.parametrize("mu", (2.5, 20, 50))
51+
def test_random_lam_expected_moments(self, mu):
52+
lam = np.array([-0.9, -0.7, -0.2, 0, 0.2, 0.7, 0.9])
53+
dist = self.pymc_dist.dist(mu=mu, lam=lam, size=(10_000, len(lam)))
54+
draws = dist.eval()
55+
56+
expected_mean = mu / (1 - lam)
57+
np.testing.assert_allclose(draws.mean(0), expected_mean, rtol=1e-1)
58+
59+
expected_std = np.sqrt(mu / (1 - lam) ** 3)
60+
np.testing.assert_allclose(draws.std(0), expected_std, rtol=1e-1)
61+
62+
def test_logp_matches_poisson(self):
63+
# We are only checking this distribution for lambda=0 where it's equivalent to Poisson.
64+
mu = pt.scalar("mu")
65+
lam = pt.scalar("lam")
66+
value = pt.vector("value", dtype="int64")
67+
68+
logp = pm.logp(GeneralizedPoisson.dist(mu, lam), value)
69+
logp_fn = pytensor.function([value, mu, lam], logp)
70+
71+
test_value = np.array([0, 1, 2, 30])
72+
for test_mu in (0.01, 0.1, 0.9, 1, 1.5, 20, 100):
73+
np.testing.assert_allclose(
74+
logp_fn(test_value, test_mu, lam=0),
75+
scipy.stats.poisson.logpmf(test_value, test_mu),
76+
rtol=1e-7 if config.floatX == "float64" else 1e-5,
77+
)
78+
79+
# Check out-of-bounds values
80+
value = pt.scalar("value")
81+
logp = pm.logp(GeneralizedPoisson.dist(mu, lam), value)
82+
logp_fn = pytensor.function([value, mu, lam], logp)
83+
84+
logp_fn(-1, mu=5, lam=0) == -np.inf
85+
logp_fn(9, mu=5, lam=-1) == -np.inf
86+
87+
# Check mu/lam restrictions
88+
with pytest.raises(ParameterValueError):
89+
logp_fn(1, mu=1, lam=2)
90+
91+
with pytest.raises(ParameterValueError):
92+
logp_fn(1, mu=0, lam=0)
93+
94+
with pytest.raises(ParameterValueError):
95+
logp_fn(1, mu=1, lam=-1)
96+
97+
def test_logp_lam_expected_moments(self):
98+
mu = 30
99+
lam = np.array([-0.9, -0.7, -0.2, 0, 0.2, 0.7, 0.9])
100+
with pm.Model():
101+
x = GeneralizedPoisson("x", mu=mu, lam=lam)
102+
trace = pm.sample(chains=1, draws=10_000, random_seed=96).posterior
103+
104+
expected_mean = mu / (1 - lam)
105+
np.testing.assert_allclose(trace["x"].mean(("chain", "draw")), expected_mean, rtol=1e-1)
106+
107+
expected_std = np.sqrt(mu / (1 - lam) ** 3)
108+
np.testing.assert_allclose(trace["x"].std(("chain", "draw")), expected_std, rtol=1e-1)
109+
110+
@pytest.mark.parametrize(
111+
"mu, lam, size, expected",
112+
[
113+
(50, [-0.6, 0, 0.6], None, np.floor(50 / (1 - np.array([-0.6, 0, 0.6])))),
114+
([5, 50], -0.1, (4, 2), np.full((4, 2), np.floor(np.array([5, 50]) / 1.1))),
115+
],
116+
)
117+
def test_moment(self, mu, lam, size, expected):
118+
with pm.Model() as model:
119+
GeneralizedPoisson("x", mu=mu, lam=lam, size=size)
120+
assert_moment_is_expected(model, expected)

0 commit comments

Comments
 (0)