diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index d8bd2239ce..4806c7aa94 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -26,7 +26,7 @@ - `dist_math.random_choice` now handles nd-arrays of category probabilities, and also handles sizes that are not `None`. Also removed unused `k` kwarg from `dist_math.random_choice`. - Changed `Categorical.mode` to preserve all the dimensions of `p` except the last one, which encodes each category's probability. - Changed initialization of `Categorical.p`. `p` is now normalized to sum to `1` inside `logp` and `random`, but not during initialization. This could hide negative values supplied to `p` as mentioned in #2082. -- To be able to test for negative `p` values supplied to `Categorical`, `Categorical.logp` was changed to check for `sum(self.p, axis=-1) == 1` only if `self.p` is not a `Number`, `np.ndarray`, `TensorConstant` or `SharedVariable`. These cases are automatically normalized to sum to `1`. The other condition may originate from a `step_method` proposal, where `self.p` tensor's value may be set, but must sum to 1 nevertheless. This may break old code which intialized `p` with a theano expression and relied on the default normalization to get it to sum to 1. `Categorical.logp` now also checks that the used `p` has values lower than 1. +- `Categorical` now accepts elements of `p` equal to `0`. `logp` will return `-inf` if there are `values` that index to the zero probability categories. ### Deprecations diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 952f987773..1dbe399363 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1,6 +1,4 @@ -import numbers import numpy as np -import theano import theano.tensor as tt from scipy import stats import warnings @@ -736,27 +734,16 @@ def logp(self, value): # Clip values before using them for indexing value_clip = tt.clip(value, 0, k - 1) - # We must only check that the values sum to 1 if p comes from a - # tensor variable, i.e. when p is a step_method proposal. In the other - # cases we normalize ourselves - if not isinstance(p_, (numbers.Number, - np.ndarray, - tt.TensorConstant, - tt.sharedvar.SharedVariable)): - sumto1 = theano.gradient.zero_grad( - tt.le(abs(tt.sum(p_, axis=-1) - 1), 1e-5)) - p = p_ - else: - p = p_ / tt.sum(p_, axis=-1, keepdims=True) - sumto1 = True + p = p_ / tt.sum(p_, axis=-1, keepdims=True) if p.ndim > 1: - a = tt.log(np.moveaxis(p, -1, 0)[value_clip]) + pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1)) + a = tt.log(p.dimshuffle(pattern)[value_clip]) else: a = tt.log(p[value_clip]) - return bound(a, value >= 0, value <= (k - 1), sumto1, - tt.all(p_ > 0, axis=-1), tt.all(p <= 1, axis=-1)) + return bound(a, value >= 0, value <= (k - 1), + tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1)) def _repr_latex_(self, name=None, dist=None): if dist is None: diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 05eb14ea82..dbb02d36d8 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -349,7 +349,7 @@ def orderedlogistic_logpdf(value, eta, cutpoints): ps = np.array([invlogit(eta - cc) - invlogit(eta - cc1) for cc, cc1 in zip(c[:-1], c[1:])]) p = ps[value] - return np.where(np.all(ps > 0), np.log(p), -np.inf) + return np.where(np.all(ps >= 0), np.log(p), -np.inf) class Simplex: def __init__(self, n):