Refactor EulerMaruyama to work in v4

junpenglao · junpenglao · commit 8f0ffb3f60f1 · 2022-10-20T22:53:43.000+02:00
diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
@@ -15,7 +15,7 @@
 import warnings
 
 from abc import ABCMeta
-from typing import Optional
+from typing import Callable, Optional
 
 import aesara
 import aesara.tensor as at
@@ -881,7 +881,26 @@ def garch11_moment(op, rv, omega, alpha_1, beta_1, initial_vol, init_dist, steps
     return at.zeros_like(rv)
 
 
-class EulerMaruyama(distribution.Continuous):
+class EulerMaruyamaRV(SymbolicRandomVariable):
+    """A placeholder used to specify a log-likelihood for a EulerMaruyama sub-graph."""
+
+    default_output = 1
+    dt: float
+    sde_fn: Callable
+    _print_name = ("EulerMaruyama", "\\operatorname{EulerMaruyama}")
+
+    def __init__(self, *args, dt, sde_fn, **kwargs):
+        self.dt = dt
+        self.sde_fn = sde_fn
+        super().__init__(*args, **kwargs)
+
+    def update(self, node: Node):
+        """Return the update mapping for the noise RV."""
+        # Since noise is a shared variable it shows up as the last node input
+        return {node.inputs[-1]: node.outputs[0]}
+
+
+class EulerMaruyama(Distribution):
     r"""
     Stochastic differential equation discretized with the Euler-Maruyama method.
 
@@ -893,39 +912,131 @@ class EulerMaruyama(distribution.Continuous):
         function returning the drift and diffusion coefficients of SDE
     sde_pars: tuple
         parameters of the SDE, passed as ``*args`` to ``sde_fn``
+    init_dist : unnamed distribution, optional
+        Scalar or vector distribution for initial values. Unnamed refers to distributions
+         created with the ``.dist()`` API. Distributions should have shape (*shape[:-1], ar_order).
+        If not, it will be automatically resized. Defaults to pm.Normal.dist(0, 100, shape=...).
+
+        .. warning:: init_dist will be cloned, rendering it independent of the one passed as input.
     """
 
-    def __new__(cls, *args, **kwargs):
-        raise NotImplementedError(f"{cls.__name__} has not yet been ported to PyMC 4.0.")
+    rv_type = EulerMaruyamaRV
+
+    def __new__(cls, name, dt, sde_fn, *args, steps=None, **kwargs):
+        dt = at.as_tensor_variable(floatX(dt))
+        steps = get_support_shape_1d(
+            support_shape=steps,
+            shape=None,  # Shape will be checked in `cls.dist`
+            dims=kwargs.get("dims", None),
+            observed=kwargs.get("observed", None),
+            support_shape_offset=1,
+        )
+        return super().__new__(cls, name, dt, sde_fn, *args, steps=steps, **kwargs)
 
     @classmethod
-    def dist(cls, *args, **kwargs):
-        raise NotImplementedError(f"{cls.__name__} has not yet been ported to PyMC 4.0.")
+    def dist(cls, dt, sde_fn, sde_pars, *, init_dist=None, steps=None, **kwargs):
+        steps = get_support_shape_1d(
+            support_shape=steps, shape=kwargs.get("shape", None), support_shape_offset=1
+        )
+        if steps is None:
+            raise ValueError("Must specify steps or shape parameter")
+        steps = at.as_tensor_variable(intX(steps), ndim=0)
 
-    def __init__(self, dt, sde_fn, sde_pars, *args, **kwds):
-        super().__init__(*args, **kwds)
-        self.dt = dt = at.as_tensor_variable(dt)
-        self.sde_fn = sde_fn
-        self.sde_pars = sde_pars
+        dt = at.as_tensor_variable(floatX(dt))
+        sde_pars = [at.as_tensor_variable(x) for x in sde_pars]
 
-    def logp(self, x):
-        """
-        Calculate log-probability of EulerMaruyama distribution at specified value.
+        if init_dist is not None:
+            if not isinstance(init_dist, TensorVariable) or not isinstance(
+                init_dist.owner.op, (RandomVariable, SymbolicRandomVariable)
+            ):
+                raise ValueError(
+                    f"Init dist must be a distribution created via the `.dist()` API, "
+                    f"got {type(init_dist)}"
+                )
+                check_dist_not_registered(init_dist)
+            if init_dist.owner.op.ndim_supp > 1:
+                raise ValueError(
+                    "Init distribution must have a scalar or vector support dimension, ",
+                    f"got ndim_supp={init_dist.owner.op.ndim_supp}.",
+                )
+        else:
+            warnings.warn(
+                "Initial distribution not specified, defaulting to "
+                "`Normal.dist(0, 100, shape=...)`. You can specify an init_dist "
+                "manually to suppress this warning.",
+                UserWarning,
+            )
+            init_dist = Normal.dist(0, 100, shape=sde_pars[0].shape)
+        # Tell Aeppl to ignore init_dist, as it will be accounted for in the logp term
+        init_dist = ignore_logprob(init_dist)
 
-        Parameters
-        ----------
-        x: numeric
-            Value for which log-probability is calculated.
+        return super().dist([dt, sde_fn, sde_pars, init_dist, steps], **kwargs)
 
-        Returns
-        -------
-        TensorVariable
-        """
-        xt = x[:-1]
-        f, g = self.sde_fn(x[:-1], *self.sde_pars)
-        mu = xt + self.dt * f
-        sigma = at.sqrt(self.dt) * g
-        return at.sum(Normal.dist(mu=mu, sigma=sigma).logp(x[1:]))
-
-    def _distr_parameters_for_repr(self):
-        return ["dt"]
+    @classmethod
+    def rv_op(cls, dt, sde_fn, sde_pars, init_dist, steps, size=None):
+        # Init dist should have shape (*size, ar_order)
+        if size is not None:
+            batch_size = size
+        else:
+            # In this case the size of the init_dist depends on the parameters shape
+            # The last dimension of rho and init_dist does not matter
+            batch_size = at.broadcast_shape(*sde_pars, at.atleast_1d(init_dist)[..., 0])
+        init_dist = change_dist_size(init_dist, batch_size)
+
+        # Create OpFromGraph representing random draws form AR process
+        # Variables with underscore suffix are dummy inputs into the OpFromGraph
+        init_ = init_dist.type()
+        sde_pars_ = [x.type() for x in sde_pars]
+        steps_ = steps.type()
+
+        noise_rng = aesara.shared(np.random.default_rng())
+
+        def step(*prev_args):
+            prev_y, *prev_sde_pars, rng = prev_args
+            f, g = sde_fn(prev_y, *prev_sde_pars)
+            mu = prev_y + dt * f
+            sigma = at.sqrt(dt) * g
+            next_rng, next_y = Normal.dist(mu=mu, sigma=sigma, rng=rng).owner.outputs
+            return next_y, {rng: next_rng}
+
+        y_t, innov_updates_ = aesara.scan(
+            fn=step,
+            outputs_info=[init_],
+            non_sequences=sde_pars_ + [noise_rng],
+            n_steps=steps_,
+            strict=True,
+        )
+        (noise_next_rng,) = tuple(innov_updates_.values())
+
+        sde_out_ = at.concatenate([init_[None, ...], y_t], axis=0).dimshuffle(
+            tuple(range(1, y_t.ndim)) + (0,)
+        )
+
+        eulermaruyama_op = EulerMaruyamaRV(
+            inputs=[init_, steps_] + sde_pars_,
+            outputs=[noise_next_rng, sde_out_],
+            dt=dt,
+            sde_fn=sde_fn,
+            ndim_supp=1,
+        )
+
+        eulermaruyama = eulermaruyama_op(init_dist, steps, *sde_pars)
+        return eulermaruyama
+
+
+@_logprob.register(EulerMaruyamaRV)
+def eulermaruyama_logp(op, values, init_dist, steps, *sde_pars_noise_arg, **kwargs):
+    (x,) = values
+    # noise arg is unused, but is needed to make the logp signature match the rv_op signature
+    *sde_pars, _ = sde_pars_noise_arg
+    xtm1 = x[..., :-1]
+    xt = x[..., 1:]
+    f, g = op.sde_fn(xtm1, *sde_pars)
+    mu = xtm1 + op.dt * f
+    sigma = at.sqrt(op.dt) * g
+    # Compute and collapse logp across time dimension
+    sde_logp = at.sum(logp(Normal.dist(mu, sigma), xt), axis=-1)
+    init_logp = logp(init_dist, x[..., :1])
+    if init_dist.owner.op.ndim_supp == 0:
+        init_logp = at.sum(init_logp, axis=-1)
+    return init_logp + sde_logp
diff --git a/pymc/tests/distributions/test_timeseries.py b/pymc/tests/distributions/test_timeseries.py
@@ -830,37 +830,37 @@ def test_change_dist_size(self):
         assert new_dist.eval().shape == (4, 3, 10)
 
 
-def _gen_sde_path(sde, pars, dt, n, x0):
-    xs = [x0]
-    wt = np.random.normal(size=(n,) if isinstance(x0, float) else (n, x0.size))
-    for i in range(n):
-        f, g = sde(xs[-1], *pars)
-        xs.append(xs[-1] + f * dt + np.sqrt(dt) * g * wt[i])
-    return np.array(xs)
-
-
-@pytest.mark.xfail(reason="Euleryama not refactored", raises=NotImplementedError)
-def test_linear():
-    lam = -0.78
-    sig2 = 5e-3
-    N = 300
-    dt = 1e-1
-    sde = lambda x, lam: (lam * x, sig2)
-    x = floatX(_gen_sde_path(sde, (lam,), dt, N, 5.0))
-    z = x + np.random.randn(x.size) * sig2
-    # build model
-    with Model() as model:
-        lamh = Flat("lamh")
-        xh = EulerMaruyama("xh", dt, sde, (lamh,), shape=N + 1, initval=x)
-        Normal("zh", mu=xh, sigma=sig2, observed=z)
-    # invert
-    with model:
-        trace = sample(init="advi+adapt_diag", chains=1)
-
-    ppc = sample_posterior_predictive(trace, model=model)
-
-    p95 = [2.5, 97.5]
-    lo, hi = np.percentile(trace[lamh], p95, axis=0)
-    assert (lo < lam) and (lam < hi)
-    lo, hi = np.percentile(ppc["zh"], p95, axis=0)
-    assert ((lo < z) * (z < hi)).mean() > 0.95
+class TestEulerMaruyama:
+
+    def _gen_sde_path(self, sde, pars, dt, n, x0):
+        xs = [x0]
+        wt = np.random.normal(size=(n,) if isinstance(x0, float) else (n, x0.size))
+        for i in range(n):
+            f, g = sde(xs[-1], *pars)
+            xs.append(xs[-1] + f * dt + np.sqrt(dt) * g * wt[i])
+        return np.array(xs)
+
+    def test_linear(self):
+        lam = -0.78
+        sig2 = 5e-3
+        N = 300
+        dt = 1e-1
+        sde = lambda x, lam: (lam * x, sig2)
+        x = floatX(self._gen_sde_path(sde, (lam,), dt, N, 5.0))
+        z = x + np.random.randn(x.size) * sig2
+        # build model
+        with Model() as model:
+            lamh = Flat("lamh")
+            xh = EulerMaruyama("xh", dt, sde, (lamh,), steps=N, initval=x)
+            Normal("zh", mu=xh, sigma=sig2, observed=z)
+        # invert
+        with model:
+            trace = sample(chains=1)
+
+        ppc = sample_posterior_predictive(trace, model=model)
+
+        p95 = [2.5, 97.5]
+        lo, hi = np.percentile(trace.posterior["lamh"], p95, axis=[0, 1])
+        assert (lo < lam) and (lam < hi)
+        lo, hi = np.percentile(ppc.posterior_predictive["zh"], p95, axis=[0, 1])
+        assert ((lo < z) * (z < hi)).mean() > 0.95