Improve multinomial moment (#6933)

aerubanov · ricardoV94 · web-flow · commit 827918b42720 · 2023-10-14T16:04:40.000+01:00
Co-authored-by: Ricardo Vieira &lt;28983449+ricardoV94@users.noreply.github.com&gt;
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
@@ -540,14 +540,24 @@ def dist(cls, n, p, *args, **kwargs):
 
     def moment(rv, size, n, p):
         n = pt.shape_padright(n)
-        mode = pt.round(n * p)
+        mean = n * p
+        mode = pt.round(mean)
+        # Add correction term between n and approximation.
+        # We modify highest expected entry to minimize chances of negative values.
         diff = n - pt.sum(mode, axis=-1, keepdims=True)
-        inc_bool_arr = pt.abs(diff) > 0
-        mode = pt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
+        max_elem_idx = pt.argmax(mean, axis=-1, keepdims=True)
+        mode = pt.inc_subtensor(
+            pt.take_along_axis(mode, max_elem_idx, axis=-1),
+            diff,
+        )
         if not rv_size_is_none(size):
             output_size = pt.concatenate([size, [p.shape[-1]]])
             mode = pt.full(output_size, mode)
-        return mode
+        return Assert(
+            "Negative value in computed moment of Multinomial."
+            "It is a known limitation that can arise when the expected largest count is small."
+            "Please provide an initial value manually."
+        )(mode, pt.all(mode >= 0))
 
     def logp(value, n, p):
         """
diff --git a/tests/distributions/test_multivariate.py b/tests/distributions/test_multivariate.py
@@ -1013,18 +1013,18 @@ class TestMoments:
         [
             (np.array([0.25, 0.25, 0.25, 0.25]), 1, None, np.array([1, 0, 0, 0])),
             (np.array([0.3, 0.6, 0.05, 0.05]), 2, None, np.array([1, 1, 0, 0])),
-            (np.array([0.3, 0.6, 0.05, 0.05]), 10, None, np.array([4, 6, 0, 0])),
+            (np.array([0.3, 0.6, 0.05, 0.05]), 10, None, np.array([3, 7, 0, 0])),
             (
                 np.array([[0.3, 0.6, 0.05, 0.05], [0.25, 0.25, 0.25, 0.25]]),
                 10,
                 None,
-                np.array([[4, 6, 0, 0], [4, 2, 2, 2]]),
+                np.array([[3, 7, 0, 0], [4, 2, 2, 2]]),
             ),
             (
                 np.array([0.3, 0.6, 0.05, 0.05]),
                 np.array([2, 10]),
                 (1, 2),
-                np.array([[[1, 1, 0, 0], [4, 6, 0, 0]]]),
+                np.array([[[1, 1, 0, 0], [3, 7, 0, 0]]]),
             ),
             (
                 np.array([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]]),
@@ -1038,6 +1038,21 @@ class TestMoments:
                 (3, 2),
                 np.full((3, 2, 4), [[1, 0, 0, 0], [2, 3, 3, 2]]),
             ),
+            (
+                np.array([0.0, 0.25, 0.25, 0.25, 0.25]),
+                1,
+                None,
+                np.array([0, 1, 0, 0, 0]),
+            ),
+            pytest.param(
+                np.array([0.1441, 0.1363, 0.1385, 0.1348, 0.1521, 0.1500, 0.1442]),
+                4,
+                None,
+                np.array([1, 1, 1, 1, 0, 0, 0]),
+                marks=pytest.mark.xfail(
+                    rises=AssertionError, reason="Known failure in mode approximation "
+                ),
+            ),
         ],
     )
     def test_multinomial_moment(self, p, n, size, expected):
@@ -1325,12 +1340,12 @@ def test_lkjcholeskycov_moment(self, n, eta, size, expected):
         [
             (np.array([2, 2, 2, 2]), 1, None, np.array([1, 0, 0, 0])),
             (np.array([3, 6, 0.5, 0.5]), 2, None, np.array([1, 1, 0, 0])),
-            (np.array([30, 60, 5, 5]), 10, None, np.array([4, 6, 0, 0])),
+            (np.array([30, 60, 5, 5]), 10, None, np.array([3, 7, 0, 0])),
             (
                 np.array([[30, 60, 5, 5], [26, 26, 26, 22]]),
                 10,
                 (1, 2),
-                np.array([[[4, 6, 0, 0], [2, 3, 3, 2]]]),
+                np.array([[[3, 7, 0, 0], [2, 3, 3, 2]]]),
             ),
             (
                 np.array([26, 26, 26, 22]),