Improve sample_ppc (#3053)

Junpeng Lao · web-flow · commit c496378db707 · 2018-06-25T19:39:33.000+02:00
* Improve sample_ppc No need to specify shape kwarg in ObservedRV for sample_ppc to work Also close #3012 * Fix test and improved also sample_ppc_w * fix test * shape not need in test
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
@@ -383,14 +383,23 @@ def _draw_value(param, point=None, givens=None, size=None):
         elif (hasattr(param, 'distribution') and
                 hasattr(param.distribution, 'random') and
                 param.distribution.random is not None):
-            # reset the dist shape for ObservedRV
             if hasattr(param, 'observations'):
+                # shape inspection for ObservedRV
                 dist_tmp = param.distribution
                 try:
                     distshape = param.observations.shape.eval()
                 except AttributeError:
                     distshape = param.observations.shape
+
                 dist_tmp.shape = distshape
+                try:
+                    dist_tmp.random(point=point, size=size)
+                except (ValueError, TypeError):
+                    # reset shape to account for shape changes
+                    # with theano.shared inputs
+                    dist_tmp.shape = np.array([])
+                    val = dist_tmp.random(point=point, size=None)
+                    dist_tmp.shape = val.shape
                 return dist_tmp.random(point=point, size=size)
             else:
                 return param.distribution.random(point=point, size=size)
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
@@ -159,10 +159,10 @@ def random_choice(*args, **kwargs):
 
         w = draw_values([self.w], point=point)[0]
         comp_tmp = self._comp_samples(point=point, size=None)
-        if self.shape.size == 0:
+        if np.asarray(self.shape).size == 0:
             distshape = np.asarray(np.broadcast(w, comp_tmp).shape)[..., :-1]
         else:
-            distshape = self.shape
+            distshape = np.asarray(self.shape)
         w_samples = generate_samples(random_choice,
                                      w=w,
                                      broadcast_shape=w.shape[:-1] or (1,),
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
@@ -982,7 +982,8 @@ def _mp_sample(draws, tune, step, chains, cores, chain, random_seed,
                 strace = _choose_backend(copy(trace), idx, model=model)
             else:
                 strace = _choose_backend(None, idx, model=model)
-            # TODO what is this for?
+            # for user supply start value, fill-in missing value if the supplied
+            # dict does not contain all parameters
             update_start_vals(start[idx - chain], model.test_point, model)
             if step.generates_stats and strace.supports_sampler_stats:
                 strace.setup(draws + tune, idx + chain, step.stats_dtypes)
@@ -1121,18 +1122,26 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
     if progressbar:
         indices = tqdm(indices, total=samples)
 
+    varnames = [var.name for var in vars]
+
+    # draw once to inspect the shape
+    var_values = list(zip(varnames,
+                          draw_values(vars, point=model.test_point, size=size)))
+    ppc_trace = defaultdict(list)
+    for varname, value in var_values:
+        ppc_trace[varname] = np.zeros((samples,) + value.shape, value.dtype)
+
     try:
-        ppc = defaultdict(list)
-        for idx in indices:
+        for slc, idx in enumerate(indices):
             if nchain > 1:
                 chain_idx, point_idx = np.divmod(idx, len_trace)
                 param = trace._straces[chain_idx].point(point_idx)
             else:
                 param = trace[idx]
 
-            for var in vars:
-                ppc[var.name].append(var.distribution.random(point=param,
-                                                             size=size))
+            values = draw_values(vars, point=param, size=size)
+            for k, v in zip(vars, values):
+                ppc_trace[k.name][slc] = v
 
     except KeyboardInterrupt:
         pass
@@ -1141,7 +1150,7 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None,
         if progressbar:
             indices.close()
 
-    return {k: np.asarray(v) for k, v in ppc.items()}
+    return ppc_trace
 
 
 def sample_ppc_w(traces, samples=None, models=None, weights=None,
@@ -1259,8 +1268,12 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None,
         for idx in indices:
             param = trace[idx]
             var = variables[idx]
-            ppc[var.name].append(var.distribution.random(point=param,
-                                                         size=size[idx]))
+            # TODO sample_ppc_w is currently only work for model with
+            # one observed.
+            ppc[var.name].append(draw_values([var],
+                                             point=param,
+                                             size=size[idx]
+                                             )[0])
 
     except KeyboardInterrupt:
         pass
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -10,6 +10,7 @@
 import theano
 
 import pymc3 as pm
+from pymc3.distributions.distribution import draw_values
 from .helpers import SeededTest
 from .test_distributions import (
     build_model, Domain, product, R, Rplus, Rplusbig, Rplusdunif,
@@ -74,7 +75,7 @@ class TestDrawValues(SeededTest):
     def test_draw_scalar_parameters(self):
         with pm.Model():
             y = pm.Normal('y1', mu=0., sd=1.)
-            mu, tau = pm.distributions.draw_values([y.distribution.mu, y.distribution.tau])
+            mu, tau = draw_values([y.distribution.mu, y.distribution.tau])
         npt.assert_almost_equal(mu, 0)
         npt.assert_almost_equal(tau, 1)
 
@@ -83,7 +84,7 @@ def test_draw_dependencies(self):
             x = pm.Normal('x', mu=0., sd=1.)
             exp_x = pm.Deterministic('exp_x', pm.math.exp(x))
 
-        x, exp_x = pm.distributions.draw_values([x, exp_x])
+        x, exp_x = draw_values([x, exp_x])
         npt.assert_almost_equal(np.exp(x), exp_x)
 
     def test_draw_order(self):
@@ -92,15 +93,15 @@ def test_draw_order(self):
             exp_x = pm.Deterministic('exp_x', pm.math.exp(x))
 
         # Need to draw x before drawing log_x
-        exp_x, x = pm.distributions.draw_values([exp_x, x])
+        exp_x, x = draw_values([exp_x, x])
         npt.assert_almost_equal(np.exp(x), exp_x)
 
     def test_draw_point_replacement(self):
         with pm.Model():
             mu = pm.Normal('mu', mu=0., tau=1e-3)
             sigma = pm.Gamma('sigma', alpha=1., beta=1., transform=None)
             y = pm.Normal('y', mu=mu, sd=sigma)
-            mu2, tau2 = pm.distributions.draw_values([y.distribution.mu, y.distribution.tau],
+            mu2, tau2 = draw_values([y.distribution.mu, y.distribution.tau],
                                                      point={'mu': 5., 'sigma': 2.})
         npt.assert_almost_equal(mu2, 5)
         npt.assert_almost_equal(tau2, 1 / 2.**2)
@@ -110,7 +111,7 @@ def test_random_sample_returns_nd_array(self):
             mu = pm.Normal('mu', mu=0., tau=1e-3)
             sigma = pm.Gamma('sigma', alpha=1., beta=1., transform=None)
             y = pm.Normal('y', mu=mu, sd=sigma)
-            mu, tau = pm.distributions.draw_values([y.distribution.mu, y.distribution.tau])
+            mu, tau = draw_values([y.distribution.mu, y.distribution.tau])
         assert isinstance(mu, np.ndarray)
         assert isinstance(tau, np.ndarray)
 
@@ -806,15 +807,15 @@ def test_mixture_random_shape():
         like0 = pm.Mixture('like0',
                            w=w0,
                            comp_dists=comp0,
-                           shape=y.shape,
                            observed=y)
 
         comp1 = pm.Poisson.dist(mu=np.ones((20, 2)),
                                 shape=(20, 2))
         w1 = pm.Dirichlet('w1', a=np.ones(2))
         like1 = pm.Mixture('like1',
                            w=w1,
-                           comp_dists=comp1, observed=y)
+                           comp_dists=comp1,
+                           observed=y)
 
         comp2 = pm.Poisson.dist(mu=np.ones(2))
         w2 = pm.Dirichlet('w2',
@@ -835,16 +836,12 @@ def test_mixture_random_shape():
                            comp_dists=comp3,
                            observed=y)
 
-    rand0 = like0.distribution.random(m.test_point, size=100)
+    rand0, rand1, rand2, rand3 = draw_values([like0, like1, like2, like3],
+                                             point=m.test_point,
+                                             size=100)
     assert rand0.shape == (100, 20)
-
-    rand1 = like1.distribution.random(m.test_point, size=100)
     assert rand1.shape == (100, 20)
-
-    rand2 = like2.distribution.random(m.test_point, size=100)
     assert rand2.shape == (100, 20)
-
-    rand3 = like3.distribution.random(m.test_point, size=100)
     assert rand3.shape == (100, 20)
 
     with m:
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
@@ -214,7 +214,8 @@ def test_choose_backend_shortcut(self):
 class TestSamplePPC(SeededTest):
     def test_normal_scalar(self):
         with pm.Model() as model:
-            a = pm.Normal('a', mu=0, sd=1)
+            mu = pm.Normal('mu', 0., 1.)
+            a = pm.Normal('a', mu=mu, sd=1, observed=0.)
             trace = pm.sample()
 
         with model:
@@ -225,7 +226,8 @@ def test_normal_scalar(self):
             ppc = pm.sample_ppc(trace, samples=1000, vars=[a])
             assert 'a' in ppc
             assert ppc['a'].shape == (1000,)
-        _, pval = stats.kstest(ppc['a'], stats.norm().cdf)
+        _, pval = stats.kstest(ppc['a'],
+                               stats.norm(loc=0, scale=np.sqrt(2)).cdf)
         assert pval > 0.001
 
         with model:
@@ -234,7 +236,9 @@ def test_normal_scalar(self):
 
     def test_normal_vector(self):
         with pm.Model() as model:
-            a = pm.Normal('a', mu=0, sd=1, shape=2)
+            mu = pm.Normal('mu', 0., 1.)
+            a = pm.Normal('a', mu=mu, sd=1,
+                          observed=np.array([.5, .2]))
             trace = pm.sample()
 
         with model:
@@ -251,16 +255,9 @@ def test_normal_vector(self):
             assert ppc['a'].shape == (10, 4, 2)
 
     def test_vector_observed(self):
-        # This test was initially created to test whether observedRVs
-        # can assert the shape automatically from the observed data.
-        # It can make sample_ppc correct for RVs similar to below (i.e.,
-        # some kind of broadcasting is involved). However, doing so makes
-        # the application with `theano.shared` array as observed data
-        # invalid (after the `.set_value` the RV shape could change).
         with pm.Model() as model:
             mu = pm.Normal('mu', mu=0, sd=1)
             a = pm.Normal('a', mu=mu, sd=1,
-                          shape=2,  # necessary to make ppc sample correct
                           observed=np.array([0., 1.]))
             trace = pm.sample()
 
@@ -300,12 +297,12 @@ def test_sample_ppc_w(self):
 
         with pm.Model() as model_0:
             mu = pm.Normal('mu', mu=0, sd=1)
-            y = pm.Normal('y', mu=mu, sd=1, observed=data0, shape=500)
+            y = pm.Normal('y', mu=mu, sd=1, observed=data0)
             trace_0 = pm.sample()
 
         with pm.Model() as model_1:
             mu = pm.Normal('mu', mu=0, sd=1, shape=len(data0))
-            y = pm.Normal('y', mu=mu, sd=1, observed=data0, shape=500)
+            y = pm.Normal('y', mu=mu, sd=1, observed=data0)
             trace_1 = pm.sample()
 
         traces = [trace_0, trace_0]
diff --git a/pymc3/tests/test_shared.py b/pymc3/tests/test_shared.py
@@ -12,7 +12,7 @@ def test_deterministic(self):
             pm.Normal('y', 0, 1, observed=X)
             model.logp(model.test_point)
 
-    def test_sample_ppc(self):
+    def test_sample(self):
         x = np.random.normal(size=100)
         y = x + np.random.normal(scale=1e-2, size=100)
 
@@ -23,10 +23,19 @@ def test_sample_ppc(self):
         with pm.Model() as model:
             b = pm.Normal('b', 0., 10.)
             pm.Normal('obs', b * x_shared, np.sqrt(1e-2), observed=y)
+            prior_trace0 = pm.sample_prior_predictive(1000)
 
             trace = pm.sample(1000, init=None, progressbar=False)
+            pp_trace0 = pm.sample_ppc(trace, 1000)
 
             x_shared.set_value(x_pred)
-            pp_trace = pm.sample_ppc(trace, 1000)
+            prior_trace1 = pm.sample_prior_predictive(1000)
+            pp_trace1 = pm.sample_ppc(trace, 1000)
 
-        np.testing.assert_allclose(x_pred, pp_trace['obs'].mean(axis=0), atol=1e-1)
+        assert prior_trace0['b'].shape == (1000,)
+        assert prior_trace0['obs'].shape == (1000, 100)
+        np.testing.assert_allclose(x, pp_trace0['obs'].mean(axis=0), atol=1e-1)
+
+        assert prior_trace1['b'].shape == (1000,)
+        assert prior_trace1['obs'].shape == (1000, 200)
+        np.testing.assert_allclose(x_pred, pp_trace1['obs'].mean(axis=0), atol=1e-1)