Skip to content

Commit 41d5283

Browse files
ally-leericardoV94
andcommitted
Implement generalized poisson distribution
Co-authored-by: Ricardo Vieira <28983449+ricardoV94@users.noreply.github.com>
1 parent 53e0e3a commit 41d5283

File tree

6 files changed

+249
-1
lines changed

6 files changed

+249
-1
lines changed

docs/source/api/distributions/discrete.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Discrete
1111
Bernoulli
1212
DiscreteWeibull
1313
Poisson
14+
GeneralizedPoisson
1415
NegativeBinomial
1516
Constant
1617
ZeroInflatedPoisson

pymc/distributions/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
Constant,
6565
DiscreteUniform,
6666
DiscreteWeibull,
67+
GeneralizedPoisson,
6768
Geometric,
6869
HyperGeometric,
6970
NegativeBinomial,
@@ -139,6 +140,7 @@
139140
"BetaBinomial",
140141
"Bernoulli",
141142
"Poisson",
143+
"GeneralizedPoisson",
142144
"NegativeBinomial",
143145
"Constant",
144146
"ZeroInflatedPoisson",

pymc/distributions/discrete.py

Lines changed: 154 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
from pymc.distributions.logprob import logp
4747
from pymc.distributions.mixture import Mixture
4848
from pymc.distributions.shape_utils import rv_size_is_none
49-
from pymc.math import sigmoid
49+
from pymc.math import log1mexp_numpy, sigmoid
5050
from pymc.vartypes import continuous_types
5151

5252
__all__ = [
@@ -55,6 +55,7 @@
5555
"Bernoulli",
5656
"DiscreteWeibull",
5757
"Poisson",
58+
"GeneralizedPoisson",
5859
"NegativeBinomial",
5960
"Constant",
6061
"ZeroInflatedPoisson",
@@ -664,6 +665,158 @@ def logcdf(value, mu):
664665
return check_parameters(res, 0 <= mu, msg="mu >= 0")
665666

666667

668+
class GeneralizedPoissonRV(RandomVariable):
669+
name = "generalized_poisson"
670+
ndim_supp = 0
671+
ndims_params = [0, 0]
672+
dtype = "int64"
673+
_print_name = ("GeneralizedPoisson", "\\operatorname{GeneralizedPoisson}")
674+
675+
@classmethod
676+
def rng_fn(cls, rng, theta, lam, size):
677+
678+
theta = np.asarray(theta)
679+
lam = np.asarray(lam)
680+
681+
if size is not None:
682+
dist_size = size
683+
else:
684+
dist_size = np.broadcast_shapes(theta.shape, lam.shape)
685+
686+
# Inversion algorithm described by Famoye (1997), computed on the log scale for
687+
# numerical stability.
688+
# TODO: Authors recommend mixing this method with the "branching" algorgithm,
689+
# but that one is only valid for positive lambdas. They also mention the normal
690+
# approximation for lambda < 0 and mu >= 10 or 0 < lambda < 0.2 and mu >= 30.
691+
# This could be done by splitting the values depending on lambda and then
692+
# recombining them.
693+
694+
log_u = np.log(rng.uniform(size=dist_size))
695+
696+
pos_lam = lam > 0
697+
with np.errstate(divide="ignore", invalid="ignore"):
698+
mixed_log_lam = np.where(pos_lam, np.log(lam), np.log(-lam))
699+
theta_m_lam = theta - lam
700+
701+
log_s = -theta
702+
log_p = log_s.copy()
703+
x_ = 0
704+
x = np.zeros(dist_size)
705+
below_cutpoint = log_s < log_u
706+
with np.errstate(divide="ignore", invalid="ignore"):
707+
while np.any(below_cutpoint):
708+
x_ += 1
709+
x[below_cutpoint] += 1
710+
log_c = np.log(theta_m_lam + lam * x_)
711+
# Compute log(1 + lam / C)
712+
log1p_lam_m_C = np.where(
713+
pos_lam,
714+
np.log1p(np.exp(mixed_log_lam - log_c)),
715+
log1mexp_numpy(mixed_log_lam - log_c, negative_input=True),
716+
)
717+
log_p = log_c + log1p_lam_m_C * (x_ - 1) + log_p - np.log(x_) - lam
718+
log_s = np.logaddexp(log_s, log_p)
719+
below_cutpoint = log_s < log_u
720+
return x
721+
722+
723+
generalized_poisson = GeneralizedPoissonRV()
724+
725+
726+
class GeneralizedPoisson(Discrete):
727+
R"""
728+
Generalized Poisson log-likelihood.
729+
730+
Used to model count data that can be either overdispersed or underdispersed.
731+
Offers greater flexibility than the standard Poisson which assumes equidispersion,
732+
where the mean is equal to the variance.
733+
734+
The pmf of this distribution is
735+
736+
.. math:: f(x \mid \mu, \lambda) =
737+
\frac{\mu (\mu + \lambda x)^{x-1} e^{-\mu - \lambda x}}{x!}
738+
739+
======== ======================================
740+
Support :math:`x \in \mathbb{N}_0`
741+
Mean :math:`\frac{\mu}{1 - \lambda}`
742+
Variance :math:`\frac{\mu}{(1 - \lambda)^3}`
743+
======== ======================================
744+
745+
Parameters
746+
----------
747+
mu : tensor_like of float
748+
Mean parameter (mu > 0).
749+
lam : tensor_like of float
750+
Dispersion parameter (max(-1, -mu/4) <= lam <= 1).
751+
752+
Notes
753+
-----
754+
When lam = 0, the Generalized Poisson reduces to the standard Poisson with the same mu.
755+
When lam < 0, the mean is greater than the variance (underdispersion).
756+
When lam > 0, the mean is less than the variance (overdispersion).
757+
758+
References
759+
----------
760+
The PMF is taken from [1] and the random generator function is adapted from [2].
761+
762+
.. [1] Consul, PoC, and Felix Famoye. "Generalized Poisson regression model."
763+
Communications in Statistics-Theory and Methods 21.1 (1992): 89-109.
764+
765+
.. [2] Famoye, Felix. "Generalized Poisson random variate generation." American
766+
Journal of Mathematical and Management Sciences 17.3-4 (1997): 219-237.
767+
768+
"""
769+
rv_op = generalized_poisson
770+
771+
@classmethod
772+
def dist(cls, mu, lam, **kwargs):
773+
mu = at.as_tensor_variable(floatX(mu))
774+
lam = at.as_tensor_variable(floatX(lam))
775+
return super().dist([mu, lam], **kwargs)
776+
777+
def moment(rv, size, mu, lam):
778+
mean = at.floor(mu / (1 - lam))
779+
if not rv_size_is_none(size):
780+
mean = at.full(size, mean)
781+
return mean
782+
783+
def logp(value, mu, lam):
784+
r"""
785+
Calculate log-probability of Generalized Poisson distribution at specified value.
786+
787+
Parameters
788+
----------
789+
value: numeric
790+
Value(s) for which log-probability is calculated. If the log probabilities for multiple
791+
values are desired the values must be provided in a numpy array or Aesara tensor
792+
793+
Returns
794+
-------
795+
TensorVariable
796+
"""
797+
mu_lam_value = mu + lam * value
798+
logprob = np.log(mu) + logpow(mu_lam_value, value - 1) - mu_lam_value - factln(value)
799+
800+
# Probability is 0 when value > m, where m is the largest positive integer for
801+
# which mu + m * lam > 0 (when lam < 0).
802+
logprob = at.switch(
803+
at.or_(
804+
mu_lam_value < 0,
805+
value < 0,
806+
),
807+
-np.inf,
808+
logprob,
809+
)
810+
811+
return check_parameters(
812+
logprob,
813+
0 < mu,
814+
at.abs(lam) <= 1,
815+
(-mu / 4) <= lam,
816+
msg="0 < mu, max(-1, -mu/4)) <= lam <= 1",
817+
)
818+
819+
667820
class NegativeBinomial(Discrete):
668821
R"""
669822
Negative binomial log-likelihood.

pymc/tests/test_distributions.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def polyagamma_cdf(*args, **kwargs):
8181
Exponential,
8282
Flat,
8383
Gamma,
84+
GeneralizedPoisson,
8485
Geometric,
8586
Gumbel,
8687
HalfCauchy,
@@ -1742,6 +1743,53 @@ def test_poisson(self):
17421743
{"mu": Rplus},
17431744
)
17441745

1746+
def test_generalized_poisson(self):
1747+
# We are only checking this distribution for lambda=0 where it's equivalent to Poisson.
1748+
self.check_logp(
1749+
GeneralizedPoisson,
1750+
Nat,
1751+
{"mu": Rplus, "lam": Domain([0], edges=(None, None))},
1752+
lambda value, mu, lam: sp.poisson.logpmf(value, mu),
1753+
skip_paramdomain_outside_edge_test=True,
1754+
)
1755+
1756+
value = at.scalar("value")
1757+
mu = at.scalar("mu")
1758+
lam = at.scalar("lam")
1759+
logp = pm.logp(GeneralizedPoisson.dist(mu, lam), value)
1760+
logp_fn = aesara.function([value, mu, lam], logp)
1761+
1762+
# Check out-of-bounds values
1763+
logp_fn(-1, mu=5, lam=0) == -np.inf
1764+
logp_fn(9, mu=5, lam=-1) == -np.inf
1765+
1766+
# Check mu/lam restrictions
1767+
with pytest.raises(ParameterValueError):
1768+
logp_fn(1, mu=1, lam=2)
1769+
1770+
with pytest.raises(ParameterValueError):
1771+
logp_fn(1, mu=0, lam=0)
1772+
1773+
with pytest.raises(ParameterValueError):
1774+
logp_fn(1, mu=1, lam=-1)
1775+
1776+
def test_generalized_poisson_lam_expected_moments(self):
1777+
# TODO: This is a costly test, we should find alternative to test logp
1778+
mu = 30
1779+
lam = np.array([-0.9, -0.7, -0.2, 0, 0.2, 0.7, 0.9])
1780+
with Model():
1781+
# We create separate dists, because the Metropolis sampler cannot find
1782+
# a good single step size to accomodate all lambda values
1783+
dist = [pm.GeneralizedPoisson(f"x_{l}", mu=mu, lam=l) for l in lam]
1784+
pm.Deterministic("x", at.stack(dist))
1785+
trace = pm.sample(return_inferencedata=False, chains=1, draws=10_000)
1786+
1787+
expected_mean = mu / (1 - lam)
1788+
np.testing.assert_allclose(trace["x"].mean(0), expected_mean, rtol=1e-1)
1789+
1790+
expected_std = np.sqrt(mu / (1 - lam) ** 3)
1791+
np.testing.assert_allclose(trace["x"].std(0), expected_std, rtol=1e-1)
1792+
17451793
def test_constantdist(self):
17461794
self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value))
17471795
self.check_logcdf(Constant, I, {"c": I}, lambda value, c: np.log(value >= c))

pymc/tests/test_distributions_moments.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
Exponential,
2929
Flat,
3030
Gamma,
31+
GeneralizedPoisson,
3132
Geometric,
3233
Gumbel,
3334
HalfCauchy,
@@ -592,6 +593,19 @@ def test_poisson_moment(mu, size, expected):
592593
assert_moment_is_expected(model, expected)
593594

594595

596+
@pytest.mark.parametrize(
597+
"mu, lam, size, expected",
598+
[
599+
(50, [-0.6, 0, 0.6], None, np.floor(50 / (1 - np.array([-0.6, 0, 0.6])))),
600+
([5, 50], -0.1, (4, 2), np.full((4, 2), np.floor(np.array([5, 50]) / 1.1))),
601+
],
602+
)
603+
def test_generalized_poisson_moment(mu, lam, size, expected):
604+
with Model() as model:
605+
GeneralizedPoisson("x", mu=mu, lam=lam, size=size)
606+
assert_moment_is_expected(model, expected)
607+
608+
595609
@pytest.mark.parametrize(
596610
"n, p, size, expected",
597611
[

pymc/tests/test_distributions_random.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,36 @@ class TestPoisson(BaseTestDistributionRandom):
972972
checks_to_run = ["check_pymc_params_match_rv_op"]
973973

974974

975+
class TestGeneralizedPoisson(BaseTestDistributionRandom):
976+
pymc_dist = pm.GeneralizedPoisson
977+
pymc_dist_params = {"mu": 4.0, "lam": 1.0}
978+
expected_rv_op_params = {"mu": 4.0, "lam": 1.0}
979+
tests_to_run = [
980+
"check_pymc_params_match_rv_op",
981+
"check_rv_size",
982+
]
983+
984+
def test_random(self):
985+
pymc_random_discrete(
986+
dist=self.pymc_dist,
987+
paramdomains={"mu": Rplus, "lam": Domain([0], edges=(None, None))},
988+
ref_rand=lambda mu, lam, size: st.poisson.rvs(mu, size=size),
989+
)
990+
991+
def test_random_lam_expected_moments(self):
992+
mu = 50
993+
lam = np.array([-0.9, -0.7, -0.2, 0, 0.2, 0.7, 0.9])
994+
995+
dist = self.pymc_dist.dist(mu=mu, lam=lam, size=(10_000, len(lam)))
996+
draws = dist.eval()
997+
998+
expected_mean = mu / (1 - lam)
999+
np.testing.assert_allclose(draws.mean(0), expected_mean, rtol=1e-1)
1000+
1001+
expected_std = np.sqrt(mu / (1 - lam) ** 3)
1002+
np.testing.assert_allclose(draws.std(0), expected_std, rtol=1e-1)
1003+
1004+
9751005
class TestMvNormalCov(BaseTestDistributionRandom):
9761006
pymc_dist = pm.MvNormal
9771007
pymc_dist_params = {

0 commit comments

Comments
 (0)