From 52ecccf9d6f778821084b7daac739bd4b1b3d4bc Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 5 Sep 2022 08:30:38 +0200 Subject: [PATCH 01/13] Fix missing imports in tests --- pymc/tests/distributions/test_multivariate.py | 1 + pymc/tests/distributions/util.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc/tests/distributions/test_multivariate.py b/pymc/tests/distributions/test_multivariate.py index e1d1d24298..df562d00da 100644 --- a/pymc/tests/distributions/test_multivariate.py +++ b/pymc/tests/distributions/test_multivariate.py @@ -52,6 +52,7 @@ Vector, assert_moment_is_expected, check_logp, + pymc_random, seeded_numpy_distribution_builder, ) from pymc.tests.helpers import select_by_precision diff --git a/pymc/tests/distributions/util.py b/pymc/tests/distributions/util.py index 2f25c77f9d..2f83fdc777 100644 --- a/pymc/tests/distributions/util.py +++ b/pymc/tests/distributions/util.py @@ -18,7 +18,7 @@ import pymc as pm -from pymc.aesaraf import change_rv_size, compile_pymc, floatX +from pymc.aesaraf import change_rv_size, compile_pymc, floatX, intX from pymc.distributions import logcdf, logp from pymc.distributions.logprob import joint_logp from pymc.initial_point import make_initial_point_fn From c8404548202c09e6e44bbeffb7388031e93f98a1 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 5 Sep 2022 07:55:37 +0200 Subject: [PATCH 02/13] Fix name error in some BaseTestDistributionRandom * This meant that the intended checks were not being run --- pymc/tests/distributions/test_discrete.py | 7 +++++-- pymc/tests/distributions/test_multivariate.py | 4 ++-- pymc/tests/distributions/util.py | 6 +++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pymc/tests/distributions/test_discrete.py b/pymc/tests/distributions/test_discrete.py index 70ea64eb5c..a885170ba2 100644 --- a/pymc/tests/distributions/test_discrete.py +++ b/pymc/tests/distributions/test_discrete.py @@ -891,7 +891,7 @@ class TestLogitBinomial(BaseTestDistributionRandom): pymc_dist = pm.Binomial pymc_dist_params = {"n": 100, "logit_p": 0.5} expected_rv_op_params = {"n": 100, "p": sp.expit(0.5)} - tests_to_run = ["check_pymc_params_match_rv_op"] + checks_to_run = ["check_pymc_params_match_rv_op"] @pytest.mark.parametrize( "n, p, logit_p, expected", @@ -968,7 +968,10 @@ class TestLogitCategorical(BaseTestDistributionRandom): expected_rv_op_params = { "p": sp.softmax(np.array([[0.28, 0.62, 0.10], [0.28, 0.62, 0.10]]), axis=-1) } - tests_to_run = [ + sizes_to_check = [None, (), (2,), (4, 2), (1, 2)] + sizes_expected = [(2,), (2,), (2,), (4, 2), (1, 2)] + + checks_to_run = [ "check_pymc_params_match_rv_op", "check_rv_size", ] diff --git a/pymc/tests/distributions/test_multivariate.py b/pymc/tests/distributions/test_multivariate.py index df562d00da..5cd734bdf0 100644 --- a/pymc/tests/distributions/test_multivariate.py +++ b/pymc/tests/distributions/test_multivariate.py @@ -1758,7 +1758,7 @@ class TestLKJCorr(BaseTestDistributionRandom): (2, 4, 2, 3), ] - tests_to_run = [ + checks_to_run = [ "check_pymc_params_match_rv_op", "check_rv_size", "check_draws_match_expected", @@ -1798,7 +1798,7 @@ class TestLKJCholeskyCov(BaseTestDistributionRandom): (2, 4, 2, 6), ] - tests_to_run = [ + checks_to_run = [ "check_rv_size", "check_draws_match_expected", ] diff --git a/pymc/tests/distributions/util.py b/pymc/tests/distributions/util.py index 2f83fdc777..f95ea67475 100644 --- a/pymc/tests/distributions/util.py +++ b/pymc/tests/distributions/util.py @@ -690,7 +690,7 @@ class BaseTestDistributionRandom(SeededTest): `check_pymc_draws_match_reference` 3. Shape variable inference is correct, via `check_rv_size` - Each desired test must be referenced by name in `tests_to_run`, when + Each desired test must be referenced by name in `checks_to_run`, when subclassing this distribution. Custom tests can be added to each class as well. See `TestFlat` for an example. @@ -706,7 +706,7 @@ class BaseTestDistributionRandom(SeededTest): ```python sizes_to_check = [None, (1), (2, 3)] sizes_expected = [(3,), (1, 3), (2, 3, 3)] - tests_to_run = ["check_rv_size"] + checks_to_run = ["check_rv_size"] ``` This is usually needed for Multivariate distributions. You can see an @@ -833,7 +833,7 @@ def check_rv_size(self): def validate_tests_list(self): assert len(self.checks_to_run) == len( set(self.checks_to_run) - ), "There are duplicates in the list of tests_to_run" + ), "There are duplicates in the list of checks_to_run" def seeded_scipy_distribution_builder(dist_name: str) -> Callable: From 3316c942e3b973bbedc88f900f0364d32c737b1f Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Wed, 31 Aug 2022 06:48:02 +0200 Subject: [PATCH 03/13] Import EulerMaruyama --- pymc/distributions/__init__.py | 4 +++- pymc/distributions/timeseries.py | 6 +++--- pymc/tests/distributions/test_distribution.py | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index 49a5338302..3b3de17809 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -105,6 +105,7 @@ from pymc.distributions.timeseries import ( AR, GARCH11, + EulerMaruyama, GaussianRandomWalk, MvGaussianRandomWalk, MvStudentTRandomWalk, @@ -171,11 +172,12 @@ "WishartBartlett", "LKJCholeskyCov", "LKJCorr", - "AR", "AsymmetricLaplace", "GaussianRandomWalk", "MvGaussianRandomWalk", "MvStudentTRandomWalk", + "AR", + "EulerMaruyama", "GARCH11", "SkewNormal", "Mixture", diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index 9fbc509174..b7583832e1 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -48,12 +48,12 @@ from pymc.util import check_dist_not_registered __all__ = [ - "AR", "GaussianRandomWalk", - "GARCH11", - "EulerMaruyama", "MvGaussianRandomWalk", "MvStudentTRandomWalk", + "AR", + "GARCH11", + "EulerMaruyama", ] diff --git a/pymc/tests/distributions/test_distribution.py b/pymc/tests/distributions/test_distribution.py index 23735656ac..37c8c0c5e7 100644 --- a/pymc/tests/distributions/test_distribution.py +++ b/pymc/tests/distributions/test_distribution.py @@ -111,6 +111,7 @@ def test_all_distributions_have_moments(): dist_module.timeseries.GARCH11, dist_module.timeseries.MvGaussianRandomWalk, dist_module.timeseries.MvStudentTRandomWalk, + dist_module.timeseries.EulerMaruyama, } # Distributions that have been refactored but don't yet have moments From 863034b841715fb33a5b80d6a6d9004452fc0a65 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 5 Sep 2022 07:59:56 +0200 Subject: [PATCH 04/13] Remove support for deprecated Ellipsis in get_steps helper --- pymc/distributions/timeseries.py | 8 +++----- pymc/tests/distributions/test_timeseries.py | 21 ++++----------------- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index b7583832e1..84ae6b8efa 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -91,14 +91,12 @@ def get_steps( inferred_steps = None if shape is not None: shape = to_tuple(shape) - if shape[-1] is not ...: - inferred_steps = shape[-1] - step_shape_offset + inferred_steps = shape[-1] - step_shape_offset if inferred_steps is None and dims is not None: dims = convert_dims(dims) - if dims[-1] is not ...: - model = modelcontext(None) - inferred_steps = model.dim_lengths[dims[-1]] - step_shape_offset + model = modelcontext(None) + inferred_steps = model.dim_lengths[dims[-1]] - step_shape_offset if inferred_steps is None and observed is not None: observed = convert_observed_data(observed) diff --git a/pymc/tests/distributions/test_timeseries.py b/pymc/tests/distributions/test_timeseries.py index 9fd366803a..a19b631709 100644 --- a/pymc/tests/distributions/test_timeseries.py +++ b/pymc/tests/distributions/test_timeseries.py @@ -57,11 +57,9 @@ (None, (10,), 0, 10, True), (None, (10,), 1, 9, True), (None, (10, 5), 0, 5, True), - (None, (10, ...), 0, None, True), (None, None, 0, None, True), (10, (10,), 0, 10, True), (10, (11,), 1, 10, True), - (10, (5, ...), 1, 10, True), (10, (5, 5), 0, 5, False), (10, (5, 10), 1, 9, False), ], @@ -76,8 +74,8 @@ def test_get_steps(info_source, steps, shape, step_shape_offset, expected_steps, dims = None coords = {} else: - dims = tuple(str(i) if shape is not ... else ... for i, shape in enumerate(shape)) - coords = {str(i): range(shape) for i, shape in enumerate(shape) if shape is not ...} + dims = tuple(str(i) for i, shape in enumerate(shape)) + coords = {str(i): range(shape) for i, shape in enumerate(shape)} with Model(coords=coords): inferred_steps = get_steps(steps=steps, dims=dims, step_shape_offset=step_shape_offset) @@ -85,9 +83,6 @@ def test_get_steps(info_source, steps, shape, step_shape_offset, expected_steps, if shape is None: observed = None else: - if ... in shape: - # There is no equivalent to implied dims in observed - return observed = np.zeros(shape) inferred_steps = get_steps( steps=steps, observed=observed, step_shape_offset=step_shape_offset @@ -199,13 +194,6 @@ def test_gaussian_random_walk_init_dist_shape(self, init): ) assert tuple(grw.owner.inputs[-2].shape.eval()) == (3, 2) - def test_shape_ellipsis(self): - grw = pm.GaussianRandomWalk.dist( - mu=0, sigma=1, steps=5, init_dist=pm.Normal.dist(), shape=(3, ...) - ) - assert tuple(grw.shape.eval()) == (3, 6) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (3,) - def test_gaussianrandomwalk_broadcasted_by_init_dist(self): grw = pm.GaussianRandomWalk.dist( mu=0, sigma=1, steps=4, init_dist=pm.Normal.dist(size=(2, 3)) @@ -219,10 +207,9 @@ def test_inferred_steps_from_shape(self, shape): steps = x.owner.inputs[-1] assert steps.eval() == 5 - @pytest.mark.parametrize("shape", (None, (5, ...))) - def test_missing_steps(self, shape): + def test_missing_steps(self): with pytest.raises(ValueError, match="Must specify steps or shape parameter"): - GaussianRandomWalk.dist(shape=shape, init_dist=Normal.dist(0, 100)) + GaussianRandomWalk.dist(shape=None, init_dist=Normal.dist(0, 100)) def test_inconsistent_steps_and_shape(self): with pytest.raises(AssertionError, match="Steps do not match last shape dimension"): From 455a518ab9521406dfb7b30c9d083c9f0e5363ec Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 31 Aug 2022 11:18:30 +0200 Subject: [PATCH 05/13] Handle non-RandomVariables in compile_forward_sampling_function --- pymc/sampling.py | 6 +----- pymc/tests/test_sampling.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pymc/sampling.py b/pymc/sampling.py index 0322ed7f2d..20100de25e 100644 --- a/pymc/sampling.py +++ b/pymc/sampling.py @@ -45,7 +45,6 @@ from aesara import tensor as at from aesara.graph.basic import Apply, Constant, Variable, general_toposort, walk from aesara.graph.fg import FunctionGraph -from aesara.tensor.random.op import RandomVariable from aesara.tensor.random.var import ( RandomGeneratorSharedVariable, RandomStateSharedVariable, @@ -1701,10 +1700,7 @@ def compile_forward_sampling_function( and not isinstance(node, (RandomStateSharedVariable, RandomGeneratorSharedVariable)) ) or ( # Basic RVs that are not in the trace - node.owner - and isinstance(node.owner.op, RandomVariable) - and node in basic_rvs - and node not in vars_in_trace + node in basic_rvs and node not in vars_in_trace ) or ( # Variables that have any volatile input node.owner and any(inp in volatile_nodes for inp in node.owner.inputs) diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py index f8ed92dcc5..4fa761b9a7 100644 --- a/pymc/tests/test_sampling.py +++ b/pymc/tests/test_sampling.py @@ -1788,6 +1788,25 @@ def test_lkj_cholesky_cov(self): assert {i.name for i in self.get_function_inputs(f)} == set() assert {i.name for i in self.get_function_roots(f)} == set() + def test_non_random_model_variable(self): + with pm.Model() as model: + # A user may register non-pure RandomVariables that can nevertheless be + # sampled, as long as a custom logprob is dispatched or Aeppl can infer + # its logprob (which is the case for `clip`) + y = at.clip(pm.Normal.dist(), -1, 1) + y = model.register_rv(y, name="y") + y_abs = pm.Deterministic("y_abs", at.abs(y)) + obs = pm.Normal("obs", y_abs, observed=np.zeros(10)) + + # y_abs should be resampled even if in the trace, because the source y is missing + f = compile_forward_sampling_function( + outputs=[obs], + vars_in_trace=[y_abs], + basic_rvs=model.basic_RVs, + ) + assert {i.name for i in self.get_function_inputs(f)} == set() + assert {i.name for i in self.get_function_roots(f)} == set() + def test_get_seeds_per_chain(): ret = _get_seeds_per_chain(None, chains=1) From f95eddfcf2cc023c0f0c3de3086d782fac3a2dee Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 26 Aug 2022 11:17:24 +0200 Subject: [PATCH 06/13] Wrap SymbolicDistribution graphs in SymbolicRandomVariable Ops This allows us to treat these variables as if they were pure RandomVariables (dispatch moments, resize, etc.) Co-authored-by: Michael Osthege --- docs/source/api/distributions/utilities.rst | 1 + pymc/aesaraf.py | 10 +- pymc/distributions/__init__.py | 2 + pymc/distributions/censored.py | 34 ++++--- pymc/distributions/distribution.py | 94 ++++++++++++++++--- pymc/distributions/mixture.py | 33 +++---- pymc/distributions/timeseries.py | 22 ++--- pymc/tests/distributions/test_distribution.py | 33 ++++++- pymc/tests/test_aesaraf.py | 10 +- pymc/tests/test_sampling.py | 4 +- 10 files changed, 168 insertions(+), 75 deletions(-) diff --git a/docs/source/api/distributions/utilities.rst b/docs/source/api/distributions/utilities.rst index a6ba09b059..1133bb0cfb 100644 --- a/docs/source/api/distributions/utilities.rst +++ b/docs/source/api/distributions/utilities.rst @@ -12,3 +12,4 @@ Distribution utilities Continuous NoDistribution DensityDist + SymbolicRandomVariable diff --git a/pymc/aesaraf.py b/pymc/aesaraf.py index c8adb276f8..c6936cde1a 100644 --- a/pymc/aesaraf.py +++ b/pymc/aesaraf.py @@ -32,7 +32,6 @@ import pandas as pd import scipy.sparse as sps -from aeppl.abstract import MeasurableVariable from aeppl.logprob import CheckParameterValue from aesara import config, scalar from aesara.compile.mode import Mode, get_mode @@ -987,6 +986,9 @@ def compile_pymc( this function is called within a model context and the model `check_bounds` flag is set to False. """ + # Avoid circular import + from pymc.distributions.distribution import SymbolicRandomVariable + # Create an update mapping of RandomVariable's RNG so that it is automatically # updated after every function call rng_updates = {} @@ -995,7 +997,7 @@ def compile_pymc( var for var in vars_between(inputs, output_to_list) if var.owner - and isinstance(var.owner.op, (RandomVariable, MeasurableVariable)) + and isinstance(var.owner.op, (RandomVariable, SymbolicRandomVariable)) and var not in inputs ): # All nodes in `vars_between(inputs, outputs)` have owners. @@ -1008,9 +1010,7 @@ def compile_pymc( else: rng_updates[rng] = rng.default_update else: - update_fn = getattr(random_var.owner.op, "update", None) - if update_fn is not None: - rng_updates.update(update_fn(random_var.owner)) + rng_updates.update(random_var.owner.op.update(random_var.owner)) # We always reseed random variables as this provides RNGs with no chances of collision if rng_updates: diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index 3b3de17809..14eefbb477 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -83,6 +83,7 @@ Distribution, NoDistribution, SymbolicDistribution, + SymbolicRandomVariable, ) from pymc.distributions.mixture import Mixture, NormalMixture from pymc.distributions.multivariate import ( @@ -156,6 +157,7 @@ "DensityDist", "Distribution", "SymbolicDistribution", + "SymbolicRandomVariable", "Continuous", "Discrete", "NoDistribution", diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py index c38681d79f..1d6cc9bd09 100644 --- a/pymc/distributions/censored.py +++ b/pymc/distributions/censored.py @@ -14,15 +14,24 @@ import aesara.tensor as at import numpy as np -from aesara.scalar import Clip from aesara.tensor import TensorVariable from aesara.tensor.random.op import RandomVariable from pymc.aesaraf import change_rv_size -from pymc.distributions.distribution import SymbolicDistribution, _moment +from pymc.distributions.distribution import ( + SymbolicDistribution, + SymbolicRandomVariable, + _moment, +) from pymc.util import check_dist_not_registered +class CensoredRV(SymbolicRandomVariable): + """Censored random variable""" + + inline_aeppl = True + + class Censored(SymbolicDistribution): r""" Censored distribution @@ -100,26 +109,23 @@ def rv_op(cls, dist, lower=None, upper=None, size=None): dist = change_rv_size(dist, dist_shape) # Censoring is achieved by clipping the base distribution between lower and upper - rv_out = at.clip(dist, lower, upper) - - # Reference nodes to facilitate identification in other classmethods, without - # worring about possible dimshuffles - rv_out.tag.dist = dist - rv_out.tag.lower = lower - rv_out.tag.upper = upper + dist_, lower_, upper_ = dist.type(), lower.type(), upper.type() + censored_rv_ = at.clip(dist_, lower_, upper_) - return rv_out + return CensoredRV( + inputs=[dist_, lower_, upper_], + outputs=[censored_rv_], + ndim_supp=0, + )(dist, lower, upper) @classmethod def change_size(cls, rv, new_size, expand=False): - dist = rv.tag.dist - lower = rv.tag.lower - upper = rv.tag.upper + dist, lower, upper = rv.owner.inputs new_dist = change_rv_size(dist, new_size, expand=expand) return cls.rv_op(new_dist, lower, upper) -@_moment.register(Clip) +@_moment.register(CensoredRV) def moment_censored(op, rv, dist, lower, upper): moment = at.switch( at.eq(lower, -np.inf), diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index d0b6747662..2bbcdc2601 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -24,12 +24,17 @@ import aesara import numpy as np +from aeppl.abstract import MeasurableVariable, _get_measurable_outputs from aeppl.logprob import _logcdf, _logprob +from aeppl.rewriting import logprob_rewrites_db from aesara import tensor as at -from aesara.graph.basic import Variable +from aesara.compile.builders import OpFromGraph +from aesara.graph import node_rewriter +from aesara.graph.basic import Node, Variable, clone_replace +from aesara.graph.rewriting.basic import in2out from aesara.tensor.basic import as_tensor_variable -from aesara.tensor.elemwise import Elemwise from aesara.tensor.random.op import RandomVariable +from aesara.tensor.random.type import RandomType from aesara.tensor.var import TensorVariable from typing_extensions import TypeAlias @@ -58,6 +63,7 @@ "Continuous", "Discrete", "NoDistribution", + "SymbolicRandomVariable", ] DIST_PARAMETER_TYPES: TypeAlias = Union[np.ndarray, int, float, TensorVariable] @@ -178,6 +184,50 @@ def _make_rv_and_resize_shape_from_dims( return rv_out, resize_shape_from_dims +class SymbolicRandomVariable(OpFromGraph): + """Symbolic Random Variable + + This is a subclasse of `OpFromGraph` which is used to encapsulate the symbolic + random graph of complex distributions which are built on top of pure + `RandomVariable`s. + + These graphs may vary structurally based on the inputs (e.g., their dimensionality), + and usually require that random inputs have specific shapes for correct outputs + (e.g., avoiding broadcasting of random inputs). Due to this, most distributions that + return SymbolicRandomVariable create their these graphs at runtime via the + classmethod `cls.rv_op`, taking care to clone and resize random inputs, if needed. + """ + + ndim_supp: int = None + """Number of support dimensions as in RandomVariables + (0 for scalar, 1 for vector, ...) + """ + + inline_aeppl: bool = False + """Specifies whether the logprob function is derived automatically by introspection + of the inner graph. + + If `False`, a logprob function must be dispatched directly to the subclass type. + """ + + _print_name: Tuple[str, str] = ("Unknown", "\\operatorname{Unknown}") + """Tuple of (name, latex name) used for for pretty-printing variables of this type""" + + def __init__(self, *args, ndim_supp, **kwargs): + self.ndim_supp = ndim_supp + kwargs.setdefault("inline", True) + super().__init__(*args, **kwargs) + + def update(self, node: Node): + """Symbolic update expression for input random state variables + + Returns a dictionary with the symbolic expressions required for correct updating + of random state input variables repeated function evaluations. This is used by + `aesaraf.compile_pymc`. + """ + return {} + + class Distribution(metaclass=DistributionMeta): """Statistical distribution""" @@ -469,7 +519,7 @@ def __new__( # Resize variable based on `dims` information if resize_shape_from_dims: resize_size_from_dims = find_size( - shape=resize_shape_from_dims, size=None, ndim_supp=rv_out.tag.ndim_supp + shape=resize_shape_from_dims, size=None, ndim_supp=rv_out.owner.op.ndim_supp ) rv_out = cls.change_size(rv=rv_out, new_size=resize_size_from_dims, expand=False) @@ -547,8 +597,6 @@ def dist( ndim_supp = cls.ndim_supp(*dist_params) create_size = find_size(shape=shape, size=size, ndim_supp=ndim_supp) rv_out = cls.rv_op(*dist_params, size=create_size, **kwargs) - # This is needed for resizing from dims in `__new__` - rv_out.tag.ndim_supp = ndim_supp rv_out.logp = _make_nice_attr_error("rv.logp(x)", "pm.logp(rv, x)") rv_out.logcdf = _make_nice_attr_error("rv.logcdf(x)", "pm.logcdf(rv, x)") @@ -556,6 +604,36 @@ def dist( return rv_out +# Let Aeppl know that the SymbolicRandomVariable has a logprob. +MeasurableVariable.register(SymbolicRandomVariable) + + +@_get_measurable_outputs.register(SymbolicRandomVariable) +def _get_measurable_outputs_symbolic_random_variable(op, node): + # This tells Aeppl that any non RandomType outputs are measurable + return [out for out in node.outputs if not isinstance(out.type, RandomType)] + + +@node_rewriter([SymbolicRandomVariable]) +def inline_symbolic_random_variable(fgraph, node): + """ + This optimization expands the internal graph of a SymbolicRV when obtaining logp + from Aeppl, if the flag `inline_aeppl` is True. + """ + op = node.op + if op.inline_aeppl: + return clone_replace(op.inner_outputs, {u: v for u, v in zip(op.inner_inputs, node.inputs)}) + + +# Registered before pre-canonicalization which happens at position=-10 +logprob_rewrites_db.register( + "inline_SymbolicRandomVariable", + in2out(inline_symbolic_random_variable), + "basic", + position=-20, +) + + @singledispatch def _moment(op, rv, *rv_inputs) -> TensorVariable: raise NotImplementedError(f"Variable {rv} of type {op} has no moment implementation.") @@ -571,12 +649,6 @@ def moment(rv: TensorVariable) -> TensorVariable: return _moment(rv.owner.op, rv, *rv.owner.inputs).astype(rv.dtype) -@_moment.register(Elemwise) -def moment_elemwise(op, rv, *dist_params): - """For Elemwise Ops, dispatch on respective scalar_op""" - return _moment(op.scalar_op, rv, *dist_params) - - class Discrete(Distribution): """Base class for discrete distributions""" diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py index 3f71f537c5..c0a3df6b61 100644 --- a/pymc/distributions/mixture.py +++ b/pymc/distributions/mixture.py @@ -17,10 +17,8 @@ import aesara.tensor as at import numpy as np -from aeppl.abstract import MeasurableVariable, _get_measurable_outputs from aeppl.logprob import _logcdf, _logprob from aeppl.transforms import IntervalTransform -from aesara.compile.builders import OpFromGraph from aesara.graph.basic import Node, equal_computations from aesara.tensor import TensorVariable from aesara.tensor.random.op import RandomVariable @@ -29,7 +27,12 @@ from pymc.distributions import transforms from pymc.distributions.continuous import Normal, get_tau_sigma from pymc.distributions.dist_math import check_parameters -from pymc.distributions.distribution import SymbolicDistribution, _moment, moment +from pymc.distributions.distribution import ( + SymbolicDistribution, + SymbolicRandomVariable, + _moment, + moment, +) from pymc.distributions.logprob import ignore_logprob, logcdf, logp from pymc.distributions.shape_utils import to_tuple from pymc.distributions.transforms import _default_transform @@ -39,7 +42,7 @@ __all__ = ["Mixture", "NormalMixture"] -class MarginalMixtureRV(OpFromGraph): +class MarginalMixtureRV(SymbolicRandomVariable): """A placeholder used to specify a log-likelihood for a mixture sub-graph.""" default_output = 1 @@ -49,9 +52,6 @@ def update(self, node: Node): return {node.inputs[0]: node.outputs[0]} -MeasurableVariable.register(MarginalMixtureRV) - - class Mixture(SymbolicDistribution): R""" Mixture log-likelihood @@ -293,16 +293,12 @@ def rv_op(cls, weights, *components, size=None): mix_op = MarginalMixtureRV( inputs=[mix_indexes_rng_, weights_, *components_], outputs=[mix_indexes_rng_next_, mix_out_], + ndim_supp=components[0].owner.op.ndim_supp, ) # Create the actual MarginalMixture variable mix_out = mix_op(mix_indexes_rng, weights, *components) - # Reference nodes to facilitate identification in other classmethods - mix_out.tag.weights = weights - mix_out.tag.components = components - mix_out.tag.choices_rng = mix_indexes_rng - return mix_out @classmethod @@ -318,14 +314,13 @@ def _resize_components(cls, size, *components): @classmethod def change_size(cls, rv, new_size, expand=False): - weights = rv.tag.weights - components = rv.tag.components + weights, *components = rv.owner.inputs[1:] if expand: - component = rv.tag.components[0] + component = components[0] # Old size is equal to `shape[:-ndim_supp]`, with care needed for `ndim_supp == 0` size_dims = component.ndim - component.owner.op.ndim_supp - if len(rv.tag.components) == 1: + if len(components) == 1: # If we have a single component, new size should ignore the mixture axis # dimension, as that is not touched by `_resize_components` size_dims -= 1 @@ -337,12 +332,6 @@ def change_size(cls, rv, new_size, expand=False): return cls.rv_op(weights, *components, size=None) -@_get_measurable_outputs.register(MarginalMixtureRV) -def _get_measurable_outputs_MarginalMixtureRV(op, node): - # This tells Aeppl that the second output is the measurable one - return [node.outputs[1]] - - @_logprob.register(MarginalMixtureRV) def marginal_mixture_logprob(op, values, rng, weights, *components, **kwargs): (value,) = values diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index 84ae6b8efa..a0c5a740a4 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -19,10 +19,8 @@ import aesara.tensor as at import numpy as np -from aeppl.abstract import MeasurableVariable, _get_measurable_outputs from aeppl.logprob import _logprob from aesara import scan -from aesara.compile.builders import OpFromGraph from aesara.graph import FunctionGraph, rewrite_graph from aesara.graph.basic import Node from aesara.raise_op import Assert @@ -35,7 +33,12 @@ from pymc.distributions import distribution, multivariate from pymc.distributions.continuous import Flat, Normal, get_tau_sigma from pymc.distributions.dist_math import check_parameters -from pymc.distributions.distribution import SymbolicDistribution, _moment, moment +from pymc.distributions.distribution import ( + SymbolicDistribution, + SymbolicRandomVariable, + _moment, + moment, +) from pymc.distributions.logprob import ignore_logprob, logp from pymc.distributions.shape_utils import ( Dims, @@ -324,7 +327,7 @@ def logp( ) -class AutoRegressiveRV(OpFromGraph): +class AutoRegressiveRV(SymbolicRandomVariable): """A placeholder used to specify a log-likelihood for an AR sub-graph.""" default_output = 1 @@ -569,7 +572,7 @@ def step(*args): outputs=[noise_next_rng, ar_], ar_order=ar_order, constant_term=constant_term, - inline=True, + ndim_supp=1, ) ar = ar_op(rhos, sigma, init_dist, steps) @@ -591,15 +594,6 @@ def change_size(cls, rv, new_size, expand=False): ) -MeasurableVariable.register(AutoRegressiveRV) - - -@_get_measurable_outputs.register(AutoRegressiveRV) -def _get_measurable_outputs_ar(op, node): - # This tells Aeppl that the second output is the measurable one - return [node.outputs[1]] - - @_logprob.register(AutoRegressiveRV) def ar_logp(op, values, rhos, sigma, init_dist, steps, noise_rng, **kwargs): (value,) = values diff --git a/pymc/tests/distributions/test_distribution.py b/pymc/tests/distributions/test_distribution.py index 37c8c0c5e7..460df4e912 100644 --- a/pymc/tests/distributions/test_distribution.py +++ b/pymc/tests/distributions/test_distribution.py @@ -20,12 +20,13 @@ import numpy.testing as npt import pytest +from aeppl.abstract import get_measurable_outputs from aesara.tensor import TensorVariable import pymc as pm -from pymc.distributions import MvNormal, MvStudentT, joint_logp, logp -from pymc.distributions.distribution import _moment, moment +from pymc.distributions import DiracDelta, Flat, MvNormal, MvStudentT, joint_logp, logp +from pymc.distributions.distribution import SymbolicRandomVariable, _moment, moment from pymc.distributions.shape_utils import to_tuple from pymc.tests.distributions.util import assert_moment_is_expected @@ -303,3 +304,31 @@ def _random(mu, rng=None, size=None): match="Cannot safely infer the size of a multivariate random variable's moment.", ): evaled_moment = moment(a).eval({mu: mu_val}) + + +class TestSymbolicRandomVarible: + def test_inline(self): + class TestSymbolicRV(SymbolicRandomVariable): + pass + + x = TestSymbolicRV([], [Flat.dist()], ndim_supp=0)() + + # By default, the SymbolicRandomVariable will not be inlined. Because we did not + # dispatch a custom logprob function it will raise next + with pytest.raises(NotImplementedError): + logp(x, 0) + + class TestInlinedSymbolicRV(SymbolicRandomVariable): + inline_aeppl = True + + x_inline = TestInlinedSymbolicRV([], [Flat.dist()], ndim_supp=0)() + assert np.isclose(logp(x_inline, 0).eval(), 0) + + def test_measurable_outputs(self): + class TestSymbolicRV(SymbolicRandomVariable): + pass + + next_rng_, dirac_delta_ = DiracDelta.dist(5).owner.outputs + next_rng, dirac_delta = TestSymbolicRV([], [next_rng_, dirac_delta_], ndim_supp=0)() + node = dirac_delta.owner + assert get_measurable_outputs(node.op, node) == [dirac_delta] diff --git a/pymc/tests/test_aesaraf.py b/pymc/tests/test_aesaraf.py index e3052b7458..d3e6f39d96 100644 --- a/pymc/tests/test_aesaraf.py +++ b/pymc/tests/test_aesaraf.py @@ -22,7 +22,6 @@ import pytest import scipy.sparse as sps -from aeppl.abstract import MeasurableVariable from aeppl.logprob import ParameterValueError from aesara.compile.builders import OpFromGraph from aesara.graph.basic import Constant, Variable, ancestors, equal_computations @@ -44,6 +43,7 @@ walk_model, ) from pymc.distributions.dist_math import check_parameters +from pymc.distributions.distribution import SymbolicRandomVariable from pymc.exceptions import ShapeError from pymc.vartypes import int_types @@ -528,20 +528,20 @@ def test_compile_pymc_updates_inputs(self): def test_compile_pymc_custom_update_op(self, _): """Test that custom MeasurableVariable Op updates are used by compile_pymc""" - class UnmeasurableOp(OpFromGraph): + class NonSymbolicRV(OpFromGraph): def update(self, node): return {node.inputs[0]: node.inputs[0] + 1} dummy_inputs = [at.scalar(), at.scalar()] dummy_outputs = [at.add(*dummy_inputs)] - dummy_x = UnmeasurableOp(dummy_inputs, dummy_outputs)(aesara.shared(1.0), 1.0) + dummy_x = NonSymbolicRV(dummy_inputs, dummy_outputs)(aesara.shared(1.0), 1.0) # Check that there are no updates at first fn = compile_pymc(inputs=[], outputs=dummy_x) assert fn() == fn() == 2.0 - # And they are enabled once the Op is registered as Measurable - MeasurableVariable.register(UnmeasurableOp) + # And they are enabled once the Op is registered as a SymbolicRV + SymbolicRandomVariable.register(NonSymbolicRV) fn = compile_pymc(inputs=[], outputs=dummy_x) assert fn() == 2.0 assert fn() == 3.0 diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py index 4fa761b9a7..89b3de7269 100644 --- a/pymc/tests/test_sampling.py +++ b/pymc/tests/test_sampling.py @@ -1697,7 +1697,7 @@ def test_volatile_parameters(self): assert {i.name for i in self.get_function_inputs(f)} == {"sigma"} assert {i.name for i in self.get_function_roots(f)} == {"mu", "sigma"} - def test_distributions_op_from_graph(self): + def test_mixture(self): with pm.Model() as model: w = pm.Dirichlet("w", a=np.ones(3), size=(5, 3)) @@ -1731,7 +1731,7 @@ def test_distributions_op_from_graph(self): assert {i.name for i in self.get_function_inputs(f)} == {"mu"} assert {i.name for i in self.get_function_roots(f)} == {"mu"} - def test_distributions_no_op_from_graph(self): + def test_censored(self): with pm.Model() as model: latent_mu = pm.Normal("latent_mu", mu=np.arange(3), sigma=1) mu = pm.Censored("mu", pm.Normal.dist(mu=latent_mu, sigma=1), lower=-1, upper=1) From 2e3247d61efa8cb2ef7365eb4427e5f9b90e3962 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 26 Aug 2022 11:32:46 +0200 Subject: [PATCH 07/13] Refactor change_rv_size into a dispatch-based method * change_rv_size is now called change_dist_size --- docs/source/api/shape_utils.rst | 1 + pymc/aesaraf.py | 65 +--------- pymc/distributions/censored.py | 16 +-- pymc/distributions/distribution.py | 10 +- pymc/distributions/mixture.py | 40 +++---- pymc/distributions/multivariate.py | 7 +- pymc/distributions/shape_utils.py | 120 ++++++++++++++++++- pymc/distributions/timeseries.py | 36 +++--- pymc/tests/distributions/test_censored.py | 8 +- pymc/tests/distributions/test_mixture.py | 10 +- pymc/tests/distributions/test_shape_utils.py | 119 +++++++++++++++++- pymc/tests/distributions/test_timeseries.py | 10 ++ pymc/tests/distributions/util.py | 8 +- pymc/tests/test_aesaraf.py | 73 +---------- 14 files changed, 319 insertions(+), 204 deletions(-) diff --git a/docs/source/api/shape_utils.rst b/docs/source/api/shape_utils.rst index 29485dd4fb..97a2280012 100644 --- a/docs/source/api/shape_utils.rst +++ b/docs/source/api/shape_utils.rst @@ -20,3 +20,4 @@ This module introduces functions that are made aware of the requested `size_tupl broadcast_distribution_samples broadcast_dist_samples_to rv_size_is_none + change_dist_size diff --git a/pymc/aesaraf.py b/pymc/aesaraf.py index c6936cde1a..235c736596 100644 --- a/pymc/aesaraf.py +++ b/pymc/aesaraf.py @@ -33,7 +33,7 @@ import scipy.sparse as sps from aeppl.logprob import CheckParameterValue -from aesara import config, scalar +from aesara import scalar from aesara.compile.mode import Mode, get_mode from aesara.gradient import grad from aesara.graph import node_rewriter @@ -47,7 +47,7 @@ walk, ) from aesara.graph.fg import FunctionGraph -from aesara.graph.op import Op, compute_test_value +from aesara.graph.op import Op from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream from aesara.scalar.basic import Cast from aesara.tensor.basic import _as_tensor_variable @@ -57,12 +57,10 @@ RandomGeneratorSharedVariable, RandomStateSharedVariable, ) -from aesara.tensor.shape import SpecifyShape from aesara.tensor.sharedvar import SharedVariable from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1 from aesara.tensor.var import TensorConstant, TensorVariable -from pymc.exceptions import ShapeError from pymc.vartypes import continuous_types, isgenerator, typefilter PotentialShapeType = Union[int, np.ndarray, Sequence[Union[int, Variable]], TensorVariable] @@ -149,65 +147,6 @@ def dataframe_to_tensor_variable(df: pd.DataFrame, *args, **kwargs) -> TensorVar return at.as_tensor_variable(df.to_numpy(), *args, **kwargs) -def change_rv_size( - rv: TensorVariable, - new_size: PotentialShapeType, - expand: Optional[bool] = False, -) -> TensorVariable: - """Change or expand the size of a `RandomVariable`. - - Parameters - ========== - rv - The old `RandomVariable` output. - new_size - The new size. - expand: - Expand the existing size by `new_size`. - - """ - # Check the dimensionality of the `new_size` kwarg - new_size_ndim = np.ndim(new_size) - if new_size_ndim > 1: - raise ShapeError("The `new_size` must be ≤1-dimensional.", actual=new_size_ndim) - elif new_size_ndim == 0: - new_size = (new_size,) - - # Extract the RV node that is to be resized, together with its inputs, name and tag - assert rv.owner.op is not None - if isinstance(rv.owner.op, SpecifyShape): - rv = rv.owner.inputs[0] - rv_node = rv.owner - rng, size, dtype, *dist_params = rv_node.inputs - name = rv.name - tag = rv.tag - - if expand: - shape = tuple(rv_node.op._infer_shape(size, dist_params)) - size = shape[: len(shape) - rv_node.op.ndim_supp] - new_size = tuple(new_size) + tuple(size) - - # Make sure the new size is a tensor. This dtype-aware conversion helps - # to not unnecessarily pick up a `Cast` in some cases (see #4652). - new_size = at.as_tensor(new_size, ndim=1, dtype="int64") - - new_rv_node = rv_node.op.make_node(rng, new_size, dtype, *dist_params) - new_rv = new_rv_node.outputs[-1] - new_rv.name = name - for k, v in tag.__dict__.items(): - new_rv.tag.__dict__.setdefault(k, v) - - # Update "traditional" rng default_update, if that was set for old RV - default_update = getattr(rng, "default_update", None) - if default_update is not None and default_update is rv_node.outputs[0]: - rng.default_update = new_rv_node.outputs[0] - - if config.compute_test_value != "off": - compute_test_value(new_rv_node) - - return new_rv - - def extract_rv_and_value_vars( var: TensorVariable, ) -> Tuple[TensorVariable, TensorVariable]: diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py index 1d6cc9bd09..0e8e399b8f 100644 --- a/pymc/distributions/censored.py +++ b/pymc/distributions/censored.py @@ -17,12 +17,12 @@ from aesara.tensor import TensorVariable from aesara.tensor.random.op import RandomVariable -from pymc.aesaraf import change_rv_size from pymc.distributions.distribution import ( SymbolicDistribution, SymbolicRandomVariable, _moment, ) +from pymc.distributions.shape_utils import _change_dist_size, change_dist_size from pymc.util import check_dist_not_registered @@ -106,7 +106,7 @@ def rv_op(cls, dist, lower=None, upper=None, size=None): # When size is not specified, dist may have to be broadcasted according to lower/upper dist_shape = size if size is not None else at.broadcast_shape(dist, lower, upper) - dist = change_rv_size(dist, dist_shape) + dist = change_dist_size(dist, dist_shape) # Censoring is achieved by clipping the base distribution between lower and upper dist_, lower_, upper_ = dist.type(), lower.type(), upper.type() @@ -118,11 +118,13 @@ def rv_op(cls, dist, lower=None, upper=None, size=None): ndim_supp=0, )(dist, lower, upper) - @classmethod - def change_size(cls, rv, new_size, expand=False): - dist, lower, upper = rv.owner.inputs - new_dist = change_rv_size(dist, new_size, expand=expand) - return cls.rv_op(new_dist, lower, upper) + +@_change_dist_size.register(CensoredRV) +def change_censored_size(cls, dist, new_size, expand=False): + uncensored_dist, lower, upper = dist.owner.inputs + if expand: + new_size = tuple(new_size) + tuple(uncensored_dist.shape) + return Censored.rv_op(uncensored_dist, lower, upper, size=new_size) @_moment.register(CensoredRV) diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index 2bbcdc2601..1015f89399 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -38,13 +38,14 @@ from aesara.tensor.var import TensorVariable from typing_extensions import TypeAlias -from pymc.aesaraf import change_rv_size, convert_observed_data +from pymc.aesaraf import convert_observed_data from pymc.distributions.shape_utils import ( Dims, Shape, Size, StrongDims, StrongShape, + change_dist_size, convert_dims, convert_shape, convert_size, @@ -322,7 +323,7 @@ def __new__( resize_size_from_dims = find_size( shape=resize_shape_from_dims, size=None, ndim_supp=cls.rv_op.ndim_supp ) - rv_out = change_rv_size(rv=rv_out, new_size=resize_size_from_dims, expand=False) + rv_out = change_dist_size(dist=rv_out, new_size=resize_size_from_dims, expand=False) rv_out = model.register_rv( rv_out, @@ -432,9 +433,6 @@ class SymbolicDistribution: cls.rv_op Returns a TensorVariable that represents the symbolic distribution parametrized by a default set of parameters and a size and rngs arguments - cls.change_size - Returns an equivalent symbolic distribution with a different size. This is - analogous to `pymc.aesaraf.change_rv_size` for `RandomVariable`s. """ def __new__( @@ -521,7 +519,7 @@ def __new__( resize_size_from_dims = find_size( shape=resize_shape_from_dims, size=None, ndim_supp=rv_out.owner.op.ndim_supp ) - rv_out = cls.change_size(rv=rv_out, new_size=resize_size_from_dims, expand=False) + rv_out = change_dist_size(rv_out, new_size=resize_size_from_dims, expand=False) rv_out = model.register_rv( rv_out, diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py index c0a3df6b61..3caad63c94 100644 --- a/pymc/distributions/mixture.py +++ b/pymc/distributions/mixture.py @@ -23,7 +23,6 @@ from aesara.tensor import TensorVariable from aesara.tensor.random.op import RandomVariable -from pymc.aesaraf import change_rv_size from pymc.distributions import transforms from pymc.distributions.continuous import Normal, get_tau_sigma from pymc.distributions.dist_math import check_parameters @@ -34,7 +33,7 @@ moment, ) from pymc.distributions.logprob import ignore_logprob, logcdf, logp -from pymc.distributions.shape_utils import to_tuple +from pymc.distributions.shape_utils import _change_dist_size, change_dist_size from pymc.distributions.transforms import _default_transform from pymc.util import check_dist_not_registered from pymc.vartypes import continuous_types, discrete_types @@ -310,26 +309,25 @@ def _resize_components(cls, size, *components): mix_size = components[0].shape[mix_axis] size = tuple(size) + (mix_size,) - return [change_rv_size(component, size) for component in components] + return [change_dist_size(component, size) for component in components] - @classmethod - def change_size(cls, rv, new_size, expand=False): - weights, *components = rv.owner.inputs[1:] - - if expand: - component = components[0] - # Old size is equal to `shape[:-ndim_supp]`, with care needed for `ndim_supp == 0` - size_dims = component.ndim - component.owner.op.ndim_supp - if len(components) == 1: - # If we have a single component, new size should ignore the mixture axis - # dimension, as that is not touched by `_resize_components` - size_dims -= 1 - old_size = components[0].shape[:size_dims] - new_size = to_tuple(new_size) + tuple(old_size) - - components = cls._resize_components(new_size, *components) - - return cls.rv_op(weights, *components, size=None) + +@_change_dist_size.register(MarginalMixtureRV) +def change_marginal_mixture_size(op, dist, new_size, expand=False): + weights, *components = dist.owner.inputs[1:] + + if expand: + component = components[0] + # Old size is equal to `shape[:-ndim_supp]`, with care needed for `ndim_supp == 0` + size_dims = component.ndim - component.owner.op.ndim_supp + if len(components) == 1: + # If we have a single component, new size should ignore the mixture axis + # dimension, as that is not touched by `_resize_components` + size_dims -= 1 + old_size = components[0].shape[:size_dims] + new_size = tuple(new_size) + tuple(old_size) + + return Mixture.rv_op(weights, *components, size=new_size) @_logprob.register(MarginalMixtureRV) diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index fcca5fccfa..82dd7d5281 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -39,7 +39,7 @@ import pymc as pm -from pymc.aesaraf import change_rv_size, floatX, intX +from pymc.aesaraf import floatX, intX from pymc.distributions import transforms from pymc.distributions.continuous import BoundedContinuous, ChiSquared, Normal from pymc.distributions.dist_math import ( @@ -53,6 +53,7 @@ from pymc.distributions.logprob import ignore_logprob from pymc.distributions.shape_utils import ( broadcast_dist_samples_to, + change_dist_size, rv_size_is_none, to_tuple, ) @@ -1122,10 +1123,10 @@ def make_node(self, rng, size, dtype, n, eta, D): # implied batched dimensions for the time being. size = normalize_size_param(size) if D.owner.op.ndim_supp == 0: - D = change_rv_size(D, at.concatenate((size, (n,)))) + D = change_dist_size(D, at.concatenate((size, (n,)))) else: # The support shape must be `n` but we have no way of controlling it - D = change_rv_size(D, size) + D = change_dist_size(D, size) return super().make_node(rng, size, dtype, n, eta, D) diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py index 21a990c2ac..8c04aed28f 100644 --- a/pymc/distributions/shape_utils.py +++ b/pymc/distributions/shape_utils.py @@ -17,12 +17,17 @@ A collection of common shape operations needed for broadcasting samples from probability distributions for stochastic nodes in PyMC. """ - +from functools import singledispatch from typing import Optional, Sequence, Tuple, Union import numpy as np +from aesara import config +from aesara import tensor as at from aesara.graph.basic import Variable +from aesara.graph.op import Op, compute_test_value +from aesara.tensor.random.op import RandomVariable +from aesara.tensor.shape import SpecifyShape from aesara.tensor.var import TensorVariable from typing_extensions import TypeAlias @@ -34,8 +39,12 @@ "broadcast_distribution_samples", "broadcast_dist_samples_to", "rv_size_is_none", + "change_dist_size", ] +from pymc.aesaraf import PotentialShapeType +from pymc.exceptions import ShapeError + def to_tuple(shape): """Convert ints, arrays, and Nones to tuples @@ -541,3 +550,112 @@ def find_size( def rv_size_is_none(size: Variable) -> bool: """Check wether an rv size is None (ie., at.Constant([]))""" return size.type.shape == (0,) # type: ignore [attr-defined] + + +@singledispatch +def _change_dist_size(op: Op, dist: TensorVariable, new_size, expand): + raise NotImplementedError( + f"Variable {dist} of type {op} has no _change_dist_size implementation." + ) + + +def change_dist_size( + dist: TensorVariable, + new_size: PotentialShapeType, + expand: bool = False, +) -> TensorVariable: + """Change or expand the size of a Distribution. + + Parameters + ---------- + dist: + The old distribution to be resized. + new_size: + The new size of the distribution. + expand: bool, optional + If True, `new_size` is prepended to the existing distribution `size`, so that + the final size is equal to (*new_size, *dist.size). Defaults to false. + + Returns + ------- + A new distribution variable that is equivalent to the original distribution with + the new size. The new distribution may reuse the same RandomState/Generator inputs + as the original distribution. + + Examples + -------- + .. code-block:: python + x = Normal.dist(shape=(2, 3)) + new_x = change_dist_size(x, new_size=(5, 3), expand=False) + assert new_x.eval().shape == (5, 3) + + new_x = change_dist_size(x, new_size=(5, 3), expand=True) + assert new_x.eval().shape == (5, 3, 2, 3) + + """ + # Check the dimensionality of the `new_size` kwarg + new_size_ndim = np.ndim(new_size) # type: ignore + if new_size_ndim > 1: + raise ShapeError("The `new_size` must be ≤1-dimensional.", actual=new_size_ndim) + elif new_size_ndim == 0: + new_size = (new_size,) # type: ignore + else: + new_size = tuple(new_size) # type: ignore + + new_dist = _change_dist_size(dist.owner.op, dist, new_size=new_size, expand=expand) + + new_dist.name = dist.name + for k, v in dist.tag.__dict__.items(): + new_dist.tag.__dict__.setdefault(k, v) + + if config.compute_test_value != "off": + compute_test_value(new_dist) + + return new_dist + + +@_change_dist_size.register(RandomVariable) +def change_rv_size(op, rv, new_size, expand) -> TensorVariable: + # Extract the RV node that is to be resized + rv_node = rv.owner + rng, size, dtype, *dist_params = rv_node.inputs + + if expand: + shape = tuple(rv_node.op._infer_shape(size, dist_params)) + size = shape[: len(shape) - rv_node.op.ndim_supp] + new_size = tuple(new_size) + tuple(size) + + # Make sure the new size is a tensor. This dtype-aware conversion helps + # to not unnecessarily pick up a `Cast` in some cases (see #4652). + new_size = at.as_tensor(new_size, ndim=1, dtype="int64") + + new_rv_node = rv_node.op.make_node(rng, new_size, dtype, *dist_params) + new_rv = new_rv_node.outputs[-1] + + # Update "traditional" rng default_update, if that was set for old RV + default_update = getattr(rng, "default_update", None) + if default_update is not None and default_update is rv_node.outputs[0]: + rng.default_update = new_rv_node.outputs[0] + + return new_rv + + +@_change_dist_size.register(SpecifyShape) +def change_specify_shape_size(op, ss, new_size, expand) -> TensorVariable: + inner_var, *shapes = ss.owner.inputs + new_var = _change_dist_size(inner_var.owner.op, inner_var, new_size=new_size, expand=expand) + + new_shapes = [None] * new_var.ndim + # Old specify_shape is still valid + if expand: + if len(shapes) > 0: + new_shapes[-len(shapes) :] = shapes + # Old specify_shape is still valid for support dimensions. We do not reintroduce + # checks for resized dimensions, although we could... + else: + ndim_supp = new_var.owner.op.ndim_supp + if ndim_supp > 0: + new_shapes[-ndim_supp:] = shapes[-ndim_supp:] + + # specify_shape has a wrong signature https://github.com/aesara-devs/aesara/issues/1164 + return at.specify_shape(new_var, new_shapes) # type: ignore diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index a0c5a740a4..8031211f88 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -29,7 +29,7 @@ from aesara.tensor.random.utils import normalize_size_param from aesara.tensor.rewriting.basic import ShapeFeature, topo_constant_folding -from pymc.aesaraf import change_rv_size, convert_observed_data, floatX, intX +from pymc.aesaraf import convert_observed_data, floatX, intX from pymc.distributions import distribution, multivariate from pymc.distributions.continuous import Flat, Normal, get_tau_sigma from pymc.distributions.dist_math import check_parameters @@ -43,6 +43,8 @@ from pymc.distributions.shape_utils import ( Dims, Shape, + _change_dist_size, + change_dist_size, convert_dims, rv_size_is_none, to_tuple, @@ -141,7 +143,7 @@ def make_node(self, rng, size, dtype, mu, sigma, init_dist, steps): init_dist_size = ( size if not rv_size_is_none(size) else at.broadcast_shape(mu, sigma, init_dist) ) - init_dist = change_rv_size(init_dist, init_dist_size) + init_dist = change_dist_size(init_dist, init_dist_size) return super().make_node(rng, size, dtype, mu, sigma, init_dist, steps) @@ -530,7 +532,7 @@ def rv_op(cls, rhos, sigma, init_dist, steps, ar_order, constant_term, size=None else: # In this case the support dimension must cover for ar_order init_dist_size = batch_size - init_dist = change_rv_size(init_dist, init_dist_size) + init_dist = change_dist_size(init_dist, init_dist_size) # Create OpFromGraph representing random draws form AR process # Variables with underscore suffix are dummy inputs into the OpFromGraph @@ -578,20 +580,20 @@ def step(*args): ar = ar_op(rhos, sigma, init_dist, steps) return ar - @classmethod - def change_size(cls, rv, new_size, expand=False): - - if expand: - old_size = rv.shape[:-1] - new_size = at.concatenate([new_size, old_size]) - - op = rv.owner.op - return cls.rv_op( - *rv.owner.inputs, - ar_order=op.ar_order, - constant_term=op.constant_term, - size=new_size, - ) + +@_change_dist_size.register(AutoRegressiveRV) +def change_ar_size(op, dist, new_size, expand=False): + + if expand: + old_size = dist.shape[:-1] + new_size = tuple(new_size) + tuple(old_size) + + return AR.rv_op( + *dist.owner.inputs[:-1], + ar_order=op.ar_order, + constant_term=op.constant_term, + size=new_size, + ) @_logprob.register(AutoRegressiveRV) diff --git a/pymc/tests/distributions/test_censored.py b/pymc/tests/distributions/test_censored.py index a62f4eab65..90bac6714e 100644 --- a/pymc/tests/distributions/test_censored.py +++ b/pymc/tests/distributions/test_censored.py @@ -17,6 +17,8 @@ import pymc as pm +from pymc.distributions.shape_utils import change_dist_size + class TestCensored: @pytest.mark.parametrize("censored", (False, True)) @@ -88,13 +90,13 @@ def test_censored_invalid_dist(self): ): x = pm.Censored("x", registered_dist, lower=None, upper=None) - def test_change_size(self): + def test_change_dist_size(self): base_dist = pm.Censored.dist(pm.Normal.dist(), -1, 1, size=(3, 2)) - new_dist = pm.Censored.change_size(base_dist, (4,)) + new_dist = change_dist_size(base_dist, (4,)) assert new_dist.eval().shape == (4,) - new_dist = pm.Censored.change_size(base_dist, (4,), expand=True) + new_dist = change_dist_size(base_dist, (4,), expand=True) assert new_dist.eval().shape == (4, 3, 2) def test_dist_broadcasted_by_lower_upper(self): diff --git a/pymc/tests/distributions/test_mixture.py b/pymc/tests/distributions/test_mixture.py index dd162aa487..c4e173a516 100644 --- a/pymc/tests/distributions/test_mixture.py +++ b/pymc/tests/distributions/test_mixture.py @@ -51,7 +51,7 @@ ) from pymc.distributions.logprob import logp from pymc.distributions.mixture import MixtureTransformWarning -from pymc.distributions.shape_utils import to_tuple +from pymc.distributions.shape_utils import change_dist_size, to_tuple from pymc.distributions.transforms import _default_transform from pymc.math import expand_packed_triangular from pymc.model import Model @@ -387,21 +387,21 @@ def test_components_expanded_by_weights(self, comp_dists): ), ) @pytest.mark.parametrize("expand", (False, True)) - def test_change_size(self, comp_dists, expand): + def test_change_dist_size(self, comp_dists, expand): if isinstance(comp_dists, list): univariate = comp_dists[0].owner.op.ndim_supp == 0 else: univariate = comp_dists.owner.op.ndim_supp == 0 mix = Mixture.dist(w=Dirichlet.dist([1, 1]), comp_dists=comp_dists) - mix = Mixture.change_size(mix, new_size=(4,), expand=expand) + mix = change_dist_size(mix, new_size=(4,), expand=expand) draws = mix.eval() expected_shape = (4,) if univariate else (4, 3) assert draws.shape == expected_shape assert np.unique(draws).size == draws.size mix = Mixture.dist(w=Dirichlet.dist([1, 1]), comp_dists=comp_dists, size=(3,)) - mix = Mixture.change_size(mix, new_size=(5, 4), expand=expand) + mix = change_dist_size(mix, new_size=(5, 4), expand=expand) draws = mix.eval() expected_shape = (5, 4) if univariate else (5, 4, 3) if expand: @@ -847,7 +847,6 @@ def ref_rand(size, w, mu, sigma): extra_args={"comp_shape": 2}, size=1000, ref_rand=ref_rand, - change_rv_size_fn=Mixture.change_size, ) pymc_random( NormalMixture, @@ -859,7 +858,6 @@ def ref_rand(size, w, mu, sigma): extra_args={"comp_shape": 3}, size=1000, ref_rand=ref_rand, - change_rv_size_fn=Mixture.change_size, ) diff --git a/pymc/tests/distributions/test_shape_utils.py b/pymc/tests/distributions/test_shape_utils.py index 77c8ea35ba..825a2fa75c 100644 --- a/pymc/tests/distributions/test_shape_utils.py +++ b/pymc/tests/distributions/test_shape_utils.py @@ -11,21 +11,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import re import warnings import aesara import numpy as np import pytest +from aesara import Mode from aesara import tensor as at +from aesara.graph import Constant, ancestors +from aesara.tensor.random import normal +from aesara.tensor.shape import SpecifyShape import pymc as pm +from pymc import ShapeError from pymc.distributions.shape_utils import ( broadcast_dist_samples_shape, broadcast_dist_samples_to, broadcast_distribution_samples, + change_dist_size, convert_dims, convert_shape, convert_size, @@ -496,3 +502,114 @@ def test_rv_size_is_none(): size = pm.Normal.dist(0, 1).size rv = pm.Normal.dist(0, 1, size=size) assert not rv_size_is_none(rv.owner.inputs[1]) + + +def test_change_rv_size(): + loc = at.as_tensor_variable([1, 2]) + rv = normal(loc=loc) + assert rv.ndim == 1 + assert tuple(rv.shape.eval()) == (2,) + + with pytest.raises(ShapeError, match="must be ≤1-dimensional"): + change_dist_size(rv, new_size=[[2, 3]]) + with pytest.raises(ShapeError, match="must be ≤1-dimensional"): + change_dist_size(rv, new_size=at.as_tensor_variable([[2, 3], [4, 5]])) + + rv_new = change_dist_size(rv, new_size=(3,), expand=True) + assert rv_new.ndim == 2 + assert tuple(rv_new.shape.eval()) == (3, 2) + + # Make sure that the shape used to determine the expanded size doesn't + # depend on the old `RandomVariable`. + rv_new_ancestors = set(ancestors((rv_new,))) + assert loc in rv_new_ancestors + assert rv not in rv_new_ancestors + + rv_newer = change_dist_size(rv_new, new_size=(4,), expand=True) + assert rv_newer.ndim == 3 + assert tuple(rv_newer.shape.eval()) == (4, 3, 2) + + # Make sure we avoid introducing a `Cast` by converting the new size before + # constructing the new `RandomVariable` + rv = normal(0, 1) + new_size = np.array([4, 3], dtype="int32") + rv_newer = change_dist_size(rv, new_size=new_size, expand=False) + assert rv_newer.ndim == 2 + assert isinstance(rv_newer.owner.inputs[1], Constant) + assert tuple(rv_newer.shape.eval()) == (4, 3) + + rv = normal(0, 1) + new_size = at.as_tensor(np.array([4, 3], dtype="int32")) + rv_newer = change_dist_size(rv, new_size=new_size, expand=True) + assert rv_newer.ndim == 2 + assert tuple(rv_newer.shape.eval()) == (4, 3) + + rv = normal(0, 1) + new_size = at.as_tensor(2, dtype="int32") + rv_newer = change_dist_size(rv, new_size=new_size, expand=True) + assert rv_newer.ndim == 1 + assert tuple(rv_newer.shape.eval()) == (2,) + + +def test_change_rv_size_default_update(): + rng = aesara.shared(np.random.default_rng(0)) + x = normal(rng=rng) + + # Test that "traditional" default_update is updated + rng.default_update = x.owner.outputs[0] + new_x = change_dist_size(x, new_size=(2,)) + assert rng.default_update is not x.owner.outputs[0] + assert rng.default_update is new_x.owner.outputs[0] + + # Test that "non-traditional" default_update is left unchanged + next_rng = aesara.shared(np.random.default_rng(1)) + rng.default_update = next_rng + new_x = change_dist_size(x, new_size=(2,)) + assert rng.default_update is next_rng + + # Test that default_update is not set if there was none before + del rng.default_update + new_x = change_dist_size(x, new_size=(2,)) + assert not hasattr(rng, "default_update") + + +def test_change_specify_shape_size_univariate(): + with aesara.config.change_flags(mode=Mode("py")): + s1, s2 = at.iscalars("s1", "s2") + x = at.random.normal(size=(s1, s2)) + x = at.specify_shape(x, (5, 3)) + x.eval({s1: 5, s2: 3}).shape == (5, 3) + + new_x = change_dist_size(x, (10, 5)) + # SpecifyShape is no longer present + assert not isinstance(new_x.owner.op, SpecifyShape) + assert new_x.eval().shape == (10, 5) + + new_x = change_dist_size(x, (10, 5), expand=True) + # SpecifyShape is still present + assert isinstance(new_x.owner.op, SpecifyShape) + new_x.eval({s1: 5, s2: 3}).shape == (10, 5, 5, 3) + with pytest.raises(AssertionError, match=re.escape("expected (None, None, 5, 3)")): + new_x.eval({s1: 6, s2: 3}) + + +def test_change_specify_shape_size_multivariate(): + with aesara.config.change_flags(mode=Mode("py")): + batch, supp = at.iscalars("batch", "supp") + x = at.random.multivariate_normal(at.zeros(supp), at.eye(supp), size=(batch,)) + x = at.specify_shape(x, (5, 3)) + x.eval({batch: 5, supp: 3}).shape == (5, 3) + + new_x = change_dist_size(x, (10, 5)) + # SpecifyShape is still present in the support dimension + assert isinstance(new_x.owner.op, SpecifyShape) + assert new_x.eval({supp: 3}).shape == (10, 5, 3) + with pytest.raises(AssertionError, match=re.escape("expected (None, None, 3)")): + new_x.eval({supp: 4}) + + new_x = change_dist_size(x, (10, 5), expand=True) + # SpecifyShape is still present in both support and batch dimension + assert isinstance(new_x.owner.op, SpecifyShape) + new_x.eval({batch: 5, supp: 3}).shape == (10, 5, 5, 3) + with pytest.raises(AssertionError, match=re.escape("expected (None, None, 5, 3)")): + new_x.eval({batch: 6, supp: 3}).shape == (10, 5, 5, 3) diff --git a/pymc/tests/distributions/test_timeseries.py b/pymc/tests/distributions/test_timeseries.py index a19b631709..7cafdef191 100644 --- a/pymc/tests/distributions/test_timeseries.py +++ b/pymc/tests/distributions/test_timeseries.py @@ -28,6 +28,7 @@ from pymc.distributions.discrete import DiracDelta from pymc.distributions.logprob import logp from pymc.distributions.multivariate import Dirichlet +from pymc.distributions.shape_utils import change_dist_size from pymc.distributions.timeseries import ( AR, GARCH11, @@ -503,6 +504,15 @@ def test_init_deprecated_arg(self): with pytest.warns(FutureWarning, match="init parameter is now called init_dist"): pm.AR.dist(rho=[1, 2, 3], init=Normal.dist(), shape=(10,)) + def test_change_dist_size(self): + base_dist = pm.AR.dist(rho=[0.5, 0.5], init_dist=pm.Normal.dist(size=(2,)), shape=(3, 10)) + + new_dist = change_dist_size(base_dist, (4,)) + assert new_dist.eval().shape == (4, 10) + + new_dist = change_dist_size(base_dist, (4,), expand=True) + assert new_dist.eval().shape == (4, 3, 10) + @pytest.mark.xfail(reason="Timeseries not refactored", raises=NotImplementedError) def test_GARCH11(): diff --git a/pymc/tests/distributions/util.py b/pymc/tests/distributions/util.py index f95ea67475..3f65350694 100644 --- a/pymc/tests/distributions/util.py +++ b/pymc/tests/distributions/util.py @@ -18,9 +18,10 @@ import pymc as pm -from pymc.aesaraf import change_rv_size, compile_pymc, floatX, intX +from pymc.aesaraf import compile_pymc, floatX, intX from pymc.distributions import logcdf, logp from pymc.distributions.logprob import joint_logp +from pymc.distributions.shape_utils import change_dist_size from pymc.initial_point import make_initial_point_fn from pymc.tests.helpers import SeededTest, select_by_precision @@ -595,7 +596,6 @@ def pymc_random( fails=10, extra_args=None, model_args=None, - change_rv_size_fn=change_rv_size, ): if valuedomain is None: valuedomain = Domain([0], edges=(None, None)) @@ -604,7 +604,7 @@ def pymc_random( model_args = {} model, param_vars = build_model(dist, valuedomain, paramdomains, extra_args) - model_dist = change_rv_size_fn(model.named_vars["value"], size, expand=True) + model_dist = change_dist_size(model.named_vars["value"], size, expand=True) pymc_rand = compile_pymc([], model_dist) domains = paramdomains.copy() @@ -643,7 +643,7 @@ def pymc_random_discrete( valuedomain = Domain([0], edges=(None, None)) model, param_vars = build_model(dist, valuedomain, paramdomains) - model_dist = change_rv_size(model.named_vars["value"], size, expand=True) + model_dist = change_dist_size(model.named_vars["value"], size, expand=True) pymc_rand = compile_pymc([], model_dist) domains = paramdomains.copy() diff --git a/pymc/tests/test_aesaraf.py b/pymc/tests/test_aesaraf.py index d3e6f39d96..681f0a1970 100644 --- a/pymc/tests/test_aesaraf.py +++ b/pymc/tests/test_aesaraf.py @@ -24,7 +24,7 @@ from aeppl.logprob import ParameterValueError from aesara.compile.builders import OpFromGraph -from aesara.graph.basic import Constant, Variable, ancestors, equal_computations +from aesara.graph.basic import Variable, equal_computations from aesara.tensor.random.basic import normal, uniform from aesara.tensor.random.op import RandomVariable from aesara.tensor.random.var import RandomStateSharedVariable @@ -34,7 +34,6 @@ import pymc as pm from pymc.aesaraf import ( - change_rv_size, compile_pymc, convert_observed_data, extract_obs_data, @@ -44,7 +43,6 @@ ) from pymc.distributions.dist_math import check_parameters from pymc.distributions.distribution import SymbolicRandomVariable -from pymc.exceptions import ShapeError from pymc.vartypes import int_types @@ -82,75 +80,6 @@ def test_pd_as_tensor_variable_multiindex() -> None: np.testing.assert_array_equal(x=at.as_tensor_variable(x=df).eval(), y=np_array) -def test_change_rv_size(): - loc = at.as_tensor_variable([1, 2]) - rv = normal(loc=loc) - assert rv.ndim == 1 - assert tuple(rv.shape.eval()) == (2,) - - with pytest.raises(ShapeError, match="must be ≤1-dimensional"): - change_rv_size(rv, new_size=[[2, 3]]) - with pytest.raises(ShapeError, match="must be ≤1-dimensional"): - change_rv_size(rv, new_size=at.as_tensor_variable([[2, 3], [4, 5]])) - - rv_new = change_rv_size(rv, new_size=(3,), expand=True) - assert rv_new.ndim == 2 - assert tuple(rv_new.shape.eval()) == (3, 2) - - # Make sure that the shape used to determine the expanded size doesn't - # depend on the old `RandomVariable`. - rv_new_ancestors = set(ancestors((rv_new,))) - assert loc in rv_new_ancestors - assert rv not in rv_new_ancestors - - rv_newer = change_rv_size(rv_new, new_size=(4,), expand=True) - assert rv_newer.ndim == 3 - assert tuple(rv_newer.shape.eval()) == (4, 3, 2) - - # Make sure we avoid introducing a `Cast` by converting the new size before - # constructing the new `RandomVariable` - rv = normal(0, 1) - new_size = np.array([4, 3], dtype="int32") - rv_newer = change_rv_size(rv, new_size=new_size, expand=False) - assert rv_newer.ndim == 2 - assert isinstance(rv_newer.owner.inputs[1], Constant) - assert tuple(rv_newer.shape.eval()) == (4, 3) - - rv = normal(0, 1) - new_size = at.as_tensor(np.array([4, 3], dtype="int32")) - rv_newer = change_rv_size(rv, new_size=new_size, expand=True) - assert rv_newer.ndim == 2 - assert tuple(rv_newer.shape.eval()) == (4, 3) - - rv = normal(0, 1) - new_size = at.as_tensor(2, dtype="int32") - rv_newer = change_rv_size(rv, new_size=new_size, expand=True) - assert rv_newer.ndim == 1 - assert tuple(rv_newer.shape.eval()) == (2,) - - -def test_change_rv_size_default_update(): - rng = aesara.shared(np.random.default_rng(0)) - x = normal(rng=rng) - - # Test that "traditional" default_update is updated - rng.default_update = x.owner.outputs[0] - new_x = change_rv_size(x, new_size=(2,)) - assert rng.default_update is not x.owner.outputs[0] - assert rng.default_update is new_x.owner.outputs[0] - - # Test that "non-traditional" default_update is left unchanged - next_rng = aesara.shared(np.random.default_rng(1)) - rng.default_update = next_rng - new_x = change_rv_size(x, new_size=(2,)) - assert rng.default_update is next_rng - - # Test that default_update is not set if there was none before - del rng.default_update - new_x = change_rv_size(x, new_size=(2,)) - assert not hasattr(rng, "default_update") - - class TestBroadcasting: def test_make_shared_replacements(self): """Check if pm.make_shared_replacements preserves broadcasting.""" From b4ce5ae07b4e4f106cd83e82f20f313e50cc5118 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 5 Sep 2022 08:11:13 +0200 Subject: [PATCH 08/13] Deprecate str_for_symbolic_dist in favor of str_for_dist * Display the dists and their inputs used by SymbolicDistributions --- pymc/distributions/censored.py | 1 + pymc/distributions/distribution.py | 6 +-- pymc/distributions/mixture.py | 1 + pymc/distributions/timeseries.py | 1 + pymc/printing.py | 78 +++++++++++++++++++++--------- pymc/tests/test_printing.py | 8 +-- 6 files changed, 64 insertions(+), 31 deletions(-) diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py index 0e8e399b8f..ef78d33449 100644 --- a/pymc/distributions/censored.py +++ b/pymc/distributions/censored.py @@ -30,6 +30,7 @@ class CensoredRV(SymbolicRandomVariable): """Censored random variable""" inline_aeppl = True + _print_name = ("Censored", "\\operatorname{Censored}") class Censored(SymbolicDistribution): diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index 1015f89399..cb3489513b 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -52,7 +52,7 @@ find_size, shape_from_dims, ) -from pymc.printing import str_for_dist, str_for_symbolic_dist +from pymc.printing import str_for_dist from pymc.util import UNSET from pymc.vartypes import string_types @@ -531,9 +531,9 @@ def __new__( initval=initval, ) # add in pretty-printing support - rv_out.str_repr = types.MethodType(str_for_symbolic_dist, rv_out) + rv_out.str_repr = types.MethodType(str_for_dist, rv_out) rv_out._repr_latex_ = types.MethodType( - functools.partial(str_for_symbolic_dist, formatting="latex"), rv_out + functools.partial(str_for_dist, formatting="latex"), rv_out ) rv_out.logp = _make_nice_attr_error("rv.logp(x)", "pm.logp(rv, x)") diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py index 3caad63c94..5fddb54c92 100644 --- a/pymc/distributions/mixture.py +++ b/pymc/distributions/mixture.py @@ -45,6 +45,7 @@ class MarginalMixtureRV(SymbolicRandomVariable): """A placeholder used to specify a log-likelihood for a mixture sub-graph.""" default_output = 1 + _print_name = ("MarginalMixture", "\\operatorname{MarginalMixture}") def update(self, node: Node): # Update for the internal mix_indexes RV diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index 8031211f88..9ac2db3e40 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -335,6 +335,7 @@ class AutoRegressiveRV(SymbolicRandomVariable): default_output = 1 ar_order: int constant_term: bool + _print_name = ("AR", "\\operatorname{AR}") def __init__(self, *args, ar_order, constant_term, **kwargs): self.ar_order = ar_order diff --git a/pymc/printing.py b/pymc/printing.py index 87c43924ec..3018e6ba53 100644 --- a/pymc/printing.py +++ b/pymc/printing.py @@ -20,6 +20,10 @@ from aesara.tensor.basic import TensorVariable, Variable from aesara.tensor.elemwise import DimShuffle from aesara.tensor.random.basic import RandomVariable +from aesara.tensor.random.var import ( + RandomGeneratorSharedVariable, + RandomStateSharedVariable, +) from aesara.tensor.var import TensorConstant from pymc.model import Model @@ -31,40 +35,62 @@ ] -def str_for_dist(rv: TensorVariable, formatting: str = "plain", include_params: bool = True) -> str: - """Make a human-readable string representation of a RandomVariable in a model, either +def str_for_dist( + dist: TensorVariable, formatting: str = "plain", include_params: bool = True +) -> str: + """Make a human-readable string representation of a Distribution in a model, either LaTeX or plain, optionally with distribution parameter values included.""" if include_params: # first 3 args are always (rng, size, dtype), rest is relevant for distribution - dist_args = [_str_for_input_var(x, formatting=formatting) for x in rv.owner.inputs[3:]] + if isinstance(dist.owner.op, RandomVariable): + dist_args = [ + _str_for_input_var(x, formatting=formatting) for x in dist.owner.inputs[3:] + ] + else: + dist_args = [ + _str_for_input_var(x, formatting=formatting) + for x in dist.owner.inputs + if not isinstance(x, (RandomStateSharedVariable, RandomGeneratorSharedVariable)) + ] + + print_name = dist.name - print_name = rv.name if rv.name is not None else "" if "latex" in formatting: - print_name = r"\text{" + _latex_escape(print_name) + "}" - dist_name = rv.owner.op._print_name[1] + if print_name is not None: + print_name = r"\text{" + _latex_escape(dist.name) + "}" + + op_name = ( + dist.owner.op._print_name[1] + if hasattr(dist.owner.op, "_print_name") + else r"\\operatorname{Unknown}" + ) if include_params: - return r"${} \sim {}({})$".format(print_name, dist_name, ",~".join(dist_args)) + if print_name: + return r"${} \sim {}({})$".format(print_name, op_name, ",~".join(dist_args)) + else: + return r"${}({})$".format(op_name, ",~".join(dist_args)) + else: - return rf"${print_name} \sim {dist_name}$" + if print_name: + return rf"${print_name} \sim {op_name}$" + else: + return rf"${op_name}$" + else: # plain - dist_name = rv.owner.op._print_name[0] + dist_name = ( + dist.owner.op._print_name[0] if hasattr(dist.owner.op, "_print_name") else "Unknown" + ) if include_params: - return r"{} ~ {}({})".format(print_name, dist_name, ", ".join(dist_args)) + if print_name: + return r"{} ~ {}({})".format(print_name, dist_name, ", ".join(dist_args)) + else: + return r"{}({})".format(dist_name, ", ".join(dist_args)) else: - return rf"{print_name} ~ {dist_name}" - - -def str_for_symbolic_dist( - rv: TensorVariable, formatting: str = "plain", include_params: bool = True -) -> str: - """Make a human-readable string representation of a SymbolicDistribution in a model, - either LaTeX or plain, optionally with distribution parameter values included.""" - - if "latex" in formatting: - return rf"$\text{{{rv.name}}} \sim \text{{{rv.owner.op}}}$" - else: - return rf"{rv.name} ~ {rv.owner.op}" + if print_name: + return rf"{print_name} ~ {dist_name}" + else: + return dist_name def str_for_model(model: Model, formatting: str = "plain", include_params: bool = True) -> str: @@ -145,7 +171,11 @@ def _is_potential_or_determinstic(var: Variable) -> bool: def _str_for_input_rv(var: Variable, formatting: str) -> str: - _str = var.name if var.name is not None else "" + _str = ( + var.name + if var.name is not None + else str_for_dist(var, formatting=formatting, include_params=True) + ) if "latex" in formatting: return r"\text{" + _latex_escape(_str) + "}" else: diff --git a/pymc/tests/test_printing.py b/pymc/tests/test_printing.py index 2d082caa30..6ef08bdf13 100644 --- a/pymc/tests/test_printing.py +++ b/pymc/tests/test_printing.py @@ -92,7 +92,7 @@ def setup_class(self): r"beta ~ N(0, 10)", r"Z ~ N(f(), f())", r"nb_with_p_n ~ NB(10, nbp)", - r"zip ~ MarginalMixtureRV{inline=False}", + r"zip ~ MarginalMixture(f(), DiracDelta(0), Pois(5))", r"Y_obs ~ N(mu, sigma)", r"pot ~ Potential(f(beta, alpha))", ], @@ -103,7 +103,7 @@ def setup_class(self): r"beta ~ N", r"Z ~ N", r"nb_with_p_n ~ NB", - r"zip ~ MarginalMixtureRV{inline=False}", + r"zip ~ MarginalMixture", r"Y_obs ~ N", r"pot ~ Potential", ], @@ -114,7 +114,7 @@ def setup_class(self): r"$\text{beta} \sim \operatorname{N}(0,~10)$", r"$\text{Z} \sim \operatorname{N}(f(),~f())$", r"$\text{nb_with_p_n} \sim \operatorname{NB}(10,~\text{nbp})$", - r"$\text{zip} \sim \text{MarginalMixtureRV{inline=False}}$", + r"$\text{zip} \sim \operatorname{MarginalMixture}(f(),~\text{\$\operatorname{DiracDelta}(0)\$},~\text{\$\operatorname{Pois}(5)\$})$", r"$\text{Y_obs} \sim \operatorname{N}(\text{mu},~\text{sigma})$", r"$\text{pot} \sim \operatorname{Potential}(f(\text{beta},~\text{alpha}))$", ], @@ -125,7 +125,7 @@ def setup_class(self): r"$\text{beta} \sim \operatorname{N}$", r"$\text{Z} \sim \operatorname{N}$", r"$\text{nb_with_p_n} \sim \operatorname{NB}$", - r"$\text{zip} \sim \text{MarginalMixtureRV{inline=False}}$", + r"$\text{zip} \sim \operatorname{MarginalMixture}$", r"$\text{Y_obs} \sim \operatorname{N}$", r"$\text{pot} \sim \operatorname{Potential}$", ], From ef0e919f3afc67318ebe2b08833b4df8c71c3e81 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 26 Aug 2022 19:25:01 +0200 Subject: [PATCH 09/13] Deprecate SymbolicDistribution in favor of Distribution class * Also fixes index error in AR when init_dist is scalar and size is None * Remove unused rv_class attribute --- docs/source/api/distributions/utilities.rst | 1 - pymc/distributions/__init__.py | 2 - pymc/distributions/censored.py | 10 +- pymc/distributions/distribution.py | 210 +----------------- pymc/distributions/mixture.py | 11 +- pymc/distributions/timeseries.py | 9 +- pymc/tests/distributions/test_distribution.py | 2 +- 7 files changed, 24 insertions(+), 221 deletions(-) diff --git a/docs/source/api/distributions/utilities.rst b/docs/source/api/distributions/utilities.rst index 1133bb0cfb..bee4250714 100644 --- a/docs/source/api/distributions/utilities.rst +++ b/docs/source/api/distributions/utilities.rst @@ -7,7 +7,6 @@ Distribution utilities :toctree: generated/ Distribution - SymbolicDistribution Discrete Continuous NoDistribution diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index 14eefbb477..0a17fb45e1 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -82,7 +82,6 @@ Discrete, Distribution, NoDistribution, - SymbolicDistribution, SymbolicRandomVariable, ) from pymc.distributions.mixture import Mixture, NormalMixture @@ -156,7 +155,6 @@ "OrderedProbit", "DensityDist", "Distribution", - "SymbolicDistribution", "SymbolicRandomVariable", "Continuous", "Discrete", diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py index ef78d33449..4b8a072fce 100644 --- a/pymc/distributions/censored.py +++ b/pymc/distributions/censored.py @@ -18,7 +18,7 @@ from aesara.tensor.random.op import RandomVariable from pymc.distributions.distribution import ( - SymbolicDistribution, + Distribution, SymbolicRandomVariable, _moment, ) @@ -33,7 +33,7 @@ class CensoredRV(SymbolicRandomVariable): _print_name = ("Censored", "\\operatorname{Censored}") -class Censored(SymbolicDistribution): +class Censored(Distribution): r""" Censored distribution @@ -82,6 +82,8 @@ class Censored(SymbolicDistribution): censored_normal = pm.Censored("censored_normal", normal_dist, lower=-1, upper=1) """ + rv_type = CensoredRV + @classmethod def dist(cls, dist, lower, upper, **kwargs): if not isinstance(dist, TensorVariable) or not isinstance(dist.owner.op, RandomVariable): @@ -95,10 +97,6 @@ def dist(cls, dist, lower, upper, **kwargs): check_dist_not_registered(dist) return super().dist([dist, lower, upper], **kwargs) - @classmethod - def ndim_supp(cls, *dist_params): - return 0 - @classmethod def rv_op(cls, dist, lower=None, upper=None, size=None): diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index cb3489513b..7654c5e58c 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -32,6 +32,7 @@ from aesara.graph import node_rewriter from aesara.graph.basic import Node, Variable, clone_replace from aesara.graph.rewriting.basic import in2out +from aesara.graph.utils import MetaType from aesara.tensor.basic import as_tensor_variable from aesara.tensor.random.op import RandomVariable from aesara.tensor.random.type import RandomType @@ -42,7 +43,6 @@ from pymc.distributions.shape_utils import ( Dims, Shape, - Size, StrongDims, StrongShape, change_dist_size, @@ -60,7 +60,6 @@ "DensityDistRV", "DensityDist", "Distribution", - "SymbolicDistribution", "Continuous", "Discrete", "NoDistribution", @@ -112,6 +111,7 @@ def _random(*args, **kwargs): if isinstance(rv_op, RandomVariable): rv_type = type(rv_op) + clsdict["rv_type"] = rv_type new_cls = super().__new__(cls, name, bases, clsdict) @@ -232,8 +232,8 @@ def update(self, node: Node): class Distribution(metaclass=DistributionMeta): """Statistical distribution""" - rv_class = None - rv_op: RandomVariable = None + rv_op: [RandomVariable, SymbolicRandomVariable] = None + rv_type: MetaType = None def __new__( cls, @@ -321,7 +321,7 @@ def __new__( # Resize variable based on `dims` information if resize_shape_from_dims: resize_size_from_dims = find_size( - shape=resize_shape_from_dims, size=None, ndim_supp=cls.rv_op.ndim_supp + shape=resize_shape_from_dims, size=None, ndim_supp=rv_out.owner.op.ndim_supp ) rv_out = change_dist_size(dist=rv_out, new_size=resize_size_from_dims, expand=False) @@ -397,202 +397,10 @@ def dist( shape = convert_shape(shape) size = convert_size(size) - create_size = find_size(shape=shape, size=size, ndim_supp=cls.rv_op.ndim_supp) - rv_out = cls.rv_op(*dist_params, size=create_size, **kwargs) - - rv_out.logp = _make_nice_attr_error("rv.logp(x)", "pm.logp(rv, x)") - rv_out.logcdf = _make_nice_attr_error("rv.logcdf(x)", "pm.logcdf(rv, x)") - rv_out.random = _make_nice_attr_error("rv.random()", "pm.draw(rv)") - return rv_out - - -class SymbolicDistribution: - """Symbolic statistical distribution - - While traditional PyMC distributions are represented by a single RandomVariable - graph, Symbolic distributions correspond to a larger graph that contains one or - more RandomVariables and an arbitrary number of deterministic operations, which - represent their own kind of distribution. - - The graphs returned by symbolic distributions can be evaluated directly to - obtain valid draws and can further be parsed by Aeppl to derive the - corresponding logp at runtime. - - Check pymc.distributions.Censored for an example of a symbolic distribution. - - Symbolic distributions must implement the following classmethods: - cls.dist - Performs input validation and converts optional alternative parametrizations - to a canonical parametrization. It should call `super().dist()`, passing a - list with the default parameters as the first and only non keyword argument, - followed by other keyword arguments like size and rngs, and return the result - cls.ndim_supp - Returns the support of the symbolic distribution, given the default set of - parameters. This may not always be constant, for instance if the symbolic - distribution can be defined based on an arbitrary base distribution. - cls.rv_op - Returns a TensorVariable that represents the symbolic distribution - parametrized by a default set of parameters and a size and rngs arguments - """ - - def __new__( - cls, - name: str, - *args, - dims: Optional[Dims] = None, - initval=None, - observed=None, - total_size=None, - transform=UNSET, - **kwargs, - ) -> TensorVariable: - """Adds a TensorVariable corresponding to a PyMC symbolic distribution to the - current model. - - Parameters - ---------- - cls : type - A distribution class that inherits from SymbolicDistribution. - name : str - Name for the new model variable. - dims : tuple, optional - A tuple of dimension names known to the model. When shape is not provided, - the shape of dims is used to define the shape of the variable. - initval : optional - Numeric or symbolic untransformed initial value of matching shape, - or one of the following initial value strategies: "moment", "prior". - Depending on the sampler's settings, a random jitter may be added to numeric, - symbolic or moment-based initial values in the transformed space. - observed : optional - Observed data to be passed when registering the random variable in the model. - When neither shape nor dims is provided, the shape of observed is used to - define the shape of the variable. - See ``Model.register_rv``. - total_size : float, optional - See ``Model.register_rv``. - transform : optional - See ``Model.register_rv``. - **kwargs - Keyword arguments that will be forwarded to ``.dist()``. - Most prominently: ``shape`` and ``size`` - - Returns - ------- - var : TensorVariable - The created variable, registered in the Model. - """ - - try: - from pymc.model import Model - - model = Model.get_context() - except TypeError: - raise TypeError( - "No model on context stack, which is needed to " - "instantiate distributions. Add variable inside " - "a 'with model:' block, or use the '.dist' syntax " - "for a standalone distribution." - ) - - if "testval" in kwargs: - initval = kwargs.pop("testval") - warnings.warn( - "The `testval` argument is deprecated; use `initval`.", - FutureWarning, - stacklevel=2, - ) - - if not isinstance(name, string_types): - raise TypeError(f"Name needs to be a string but got: {name}") - - dims = convert_dims(dims) - if observed is not None: - observed = convert_observed_data(observed) - - # Create the RV, without taking `dims` into consideration - rv_out, resize_shape_from_dims = _make_rv_and_resize_shape_from_dims( - cls=cls, dims=dims, model=model, observed=observed, args=args, **kwargs - ) - - # Resize variable based on `dims` information - if resize_shape_from_dims: - resize_size_from_dims = find_size( - shape=resize_shape_from_dims, size=None, ndim_supp=rv_out.owner.op.ndim_supp - ) - rv_out = change_dist_size(rv_out, new_size=resize_size_from_dims, expand=False) - - rv_out = model.register_rv( - rv_out, - name, - observed, - total_size, - dims=dims, - transform=transform, - initval=initval, - ) - # add in pretty-printing support - rv_out.str_repr = types.MethodType(str_for_dist, rv_out) - rv_out._repr_latex_ = types.MethodType( - functools.partial(str_for_dist, formatting="latex"), rv_out - ) - - rv_out.logp = _make_nice_attr_error("rv.logp(x)", "pm.logp(rv, x)") - rv_out.logcdf = _make_nice_attr_error("rv.logcdf(x)", "pm.logcdf(rv, x)") - rv_out.random = _make_nice_attr_error("rv.random()", "pm.draw(rv)") - - return rv_out - - @classmethod - def dist( - cls, - dist_params, - *, - shape: Optional[Shape] = None, - size: Optional[Size] = None, - **kwargs, - ) -> TensorVariable: - """Creates a TensorVariable corresponding to the `cls` symbolic distribution. - - Parameters - ---------- - dist_params : array-like - The inputs to the `RandomVariable` `Op`. - shape : int, tuple, Variable, optional - A tuple of sizes for each dimension of the new RV. - size : int, tuple, Variable, optional - For creating the RV like in Aesara/NumPy. - - Returns - ------- - var : TensorVariable - """ - - if "testval" in kwargs: - kwargs.pop("testval") - warnings.warn( - "The `.dist(testval=...)` argument is deprecated and has no effect. " - "Initial values for sampling/optimization can be specified with `initval` in a modelcontext. " - "For using Aesara's test value features, you must assign the `.tag.test_value` yourself.", - FutureWarning, - stacklevel=2, - ) - if "initval" in kwargs: - raise TypeError( - "Unexpected keyword argument `initval`. " - "This argument is not available for the `.dist()` API." - ) - - if "dims" in kwargs: - raise NotImplementedError("The use of a `.dist(dims=...)` API is not supported.") - if shape is not None and size is not None: - raise ValueError( - f"Passing both `shape` ({shape}) and `size` ({size}) is not supported!" - ) - - shape = convert_shape(shape) - size = convert_size(size) - - ndim_supp = cls.ndim_supp(*dist_params) + # SymbolicRVs don't have `ndim_supp` until they are created + ndim_supp = getattr(cls.rv_op, "ndim_supp", None) + if ndim_supp is None: + ndim_supp = cls.rv_op(*dist_params, **kwargs).owner.op.ndim_supp create_size = find_size(shape=shape, size=size, ndim_supp=ndim_supp) rv_out = cls.rv_op(*dist_params, size=create_size, **kwargs) diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py index 5fddb54c92..bb28fb4763 100644 --- a/pymc/distributions/mixture.py +++ b/pymc/distributions/mixture.py @@ -27,7 +27,7 @@ from pymc.distributions.continuous import Normal, get_tau_sigma from pymc.distributions.dist_math import check_parameters from pymc.distributions.distribution import ( - SymbolicDistribution, + Distribution, SymbolicRandomVariable, _moment, moment, @@ -52,7 +52,7 @@ def update(self, node: Node): return {node.inputs[0]: node.outputs[0]} -class Mixture(SymbolicDistribution): +class Mixture(Distribution): R""" Mixture log-likelihood @@ -161,6 +161,8 @@ class Mixture(SymbolicDistribution): like = pm.Mixture('like', w=w, comp_dists=components, observed=data) """ + rv_type = MarginalMixtureRV + @classmethod def dist(cls, w, comp_dists, **kwargs): if not isinstance(comp_dists, (tuple, list)): @@ -205,11 +207,6 @@ def dist(cls, w, comp_dists, **kwargs): w = at.as_tensor_variable(w) return super().dist([w, *comp_dists], **kwargs) - @classmethod - def ndim_supp(cls, weights, *components): - # We already checked that all components have the same support dimensionality - return components[0].owner.op.ndim_supp - @classmethod def rv_op(cls, weights, *components, size=None): # Create new rng for the mix_indexes internal RV diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index 9ac2db3e40..aec9776605 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -34,7 +34,7 @@ from pymc.distributions.continuous import Flat, Normal, get_tau_sigma from pymc.distributions.dist_math import check_parameters from pymc.distributions.distribution import ( - SymbolicDistribution, + Distribution, SymbolicRandomVariable, _moment, moment, @@ -332,6 +332,7 @@ def logp( class AutoRegressiveRV(SymbolicRandomVariable): """A placeholder used to specify a log-likelihood for an AR sub-graph.""" + _print_name = ("AR", "\\operatorname{AR}") default_output = 1 ar_order: int constant_term: bool @@ -348,7 +349,7 @@ def update(self, node: Node): return {node.inputs[-1]: node.outputs[0]} -class AR(SymbolicDistribution): +class AR(Distribution): r"""Autoregressive process with p lags. .. math:: @@ -408,6 +409,8 @@ class AR(SymbolicDistribution): """ + rv_type = AutoRegressiveRV + def __new__(cls, name, rho, *args, steps=None, constant=False, ar_order=None, **kwargs): rhos = at.atleast_1d(at.as_tensor_variable(floatX(rho))) ar_order = cls._get_ar_order(rhos=rhos, constant=constant, ar_order=ar_order) @@ -527,7 +530,7 @@ def rv_op(cls, rhos, sigma, init_dist, steps, ar_order, constant_term, size=None else: # In this case the size of the init_dist depends on the parameters shape # The last dimension of rho and init_dist does not matter - batch_size = at.broadcast_shape(sigma, rhos[..., 0], init_dist[..., 0]) + batch_size = at.broadcast_shape(sigma, rhos[..., 0], at.atleast_1d(init_dist)[..., 0]) if init_dist.owner.op.ndim_supp == 0: init_dist_size = (*batch_size, ar_order) else: diff --git a/pymc/tests/distributions/test_distribution.py b/pymc/tests/distributions/test_distribution.py index 460df4e912..0cea2916f5 100644 --- a/pymc/tests/distributions/test_distribution.py +++ b/pymc/tests/distributions/test_distribution.py @@ -94,7 +94,7 @@ def test_all_distributions_have_moments(): dists = (getattr(dist_module, dist) for dist in dist_module.__all__) dists = (dist for dist in dists if isinstance(dist, DistributionMeta)) missing_moments = { - dist for dist in dists if type(getattr(dist, "rv_op", None)) not in _moment.registry + dist for dist in dists if getattr(dist, "rv_type", None) not in _moment.registry } # Ignore super classes From 08f5d45736cb769be613475195cb3fa966a9d70f Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 30 Aug 2022 16:27:04 +0200 Subject: [PATCH 10/13] Convert _LKJCholeskyCov into a SymbolicRandomVariable This is necessary because the distribution is in fact a "Factory distribution" that needs to be able to resize `sd_dist` dynamically in order to work correctly. --- pymc/distributions/multivariate.py | 200 ++++++++++-------- pymc/tests/distributions/test_multivariate.py | 28 ++- 2 files changed, 138 insertions(+), 90 deletions(-) diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index 82dd7d5281..352494e395 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -24,6 +24,7 @@ import numpy as np import scipy +from aeppl.logprob import _logprob from aesara.graph.basic import Apply, Constant, Variable from aesara.graph.op import Op from aesara.raise_op import Assert @@ -32,7 +33,7 @@ from aesara.tensor.nlinalg import det, eigh, matrix_inverse, trace from aesara.tensor.random.basic import dirichlet, multinomial, multivariate_normal from aesara.tensor.random.op import RandomVariable, default_supp_shape_from_params -from aesara.tensor.random.utils import broadcast_params, normalize_size_param +from aesara.tensor.random.utils import broadcast_params from aesara.tensor.slinalg import Cholesky, SolveTriangular from aesara.tensor.type import TensorType from scipy import linalg, stats @@ -49,9 +50,17 @@ logpow, multigammaln, ) -from pymc.distributions.distribution import Continuous, Discrete, moment +from pymc.distributions.distribution import ( + Continuous, + Discrete, + Distribution, + SymbolicRandomVariable, + _moment, + moment, +) from pymc.distributions.logprob import ignore_logprob from pymc.distributions.shape_utils import ( + _change_dist_size, broadcast_dist_samples_to, change_dist_size, rv_size_is_none, @@ -1097,12 +1106,12 @@ def _lkj_normalizing_constant(eta, n): return result -class _LKJCholeskyCovRV(RandomVariable): - name = "_lkjcholeskycov" +class _LKJCholeskyCovBaseRV(RandomVariable): + name = "_lkjcholeskycovbase" ndim_supp = 1 ndims_params = [0, 0, 1] dtype = "floatX" - _print_name = ("_lkjcholeskycov", "\\operatorname{_lkjcholeskycov}") + _print_name = ("_lkjcholeskycovbase", "\\operatorname{_lkjcholeskycovbase}") def make_node(self, rng, size, dtype, n, eta, D): n = at.as_tensor_variable(n) @@ -1115,35 +1124,19 @@ def make_node(self, rng, size, dtype, n, eta, D): D = at.as_tensor_variable(D) - # We resize the sd_dist `D` automatically so that it has (size x n) independent - # draws which is what the `_LKJCholeskyCovRV.rng_fn` expects. This makes the - # random and logp methods equivalent, as the latter also assumes a unique value - # for each diagonal element. - # Since `eta` and `n` are forced to be scalars we don't need to worry about - # implied batched dimensions for the time being. - size = normalize_size_param(size) - if D.owner.op.ndim_supp == 0: - D = change_dist_size(D, at.concatenate((size, (n,)))) - else: - # The support shape must be `n` but we have no way of controlling it - D = change_dist_size(D, size) - return super().make_node(rng, size, dtype, n, eta, D) - def _infer_shape(self, size, dist_params, param_shapes=None): + def _supp_shape_from_params(self, dist_params, param_shapes): n = dist_params[0] - dist_shape = tuple(size) + ((n * (n + 1)) // 2,) - return dist_shape + return ((n * (n + 1)) // 2,) def rng_fn(self, rng, n, eta, D, size): # We flatten the size to make operations easier, and then rebuild it if size is None: - flat_size = 1 - else: - flat_size = np.prod(size) - - C = LKJCorrRV._random_corr_matrix(rng, n, eta, flat_size) + size = D.shape[:-1] + flat_size = np.prod(size).astype(int) + C = LKJCorrRV._random_corr_matrix(rng=rng, n=n, eta=eta, flat_size=flat_size) D = D.reshape(flat_size, n) C *= D[..., :, np.newaxis] * D[..., np.newaxis, :] @@ -1159,23 +1152,30 @@ def rng_fn(self, rng, n, eta, D, size): return samples -_ljk_cholesky_cov = _LKJCholeskyCovRV() +_ljk_cholesky_cov_base = _LKJCholeskyCovBaseRV() -class _LKJCholeskyCov(Continuous): +# _LKJCholeskyCovBaseRV requires a properly shaped `D`, which means the variable can't +# be safely resized. Because of this, we add the thin SymbolicRandomVariable wrapper +class _LKJCholeskyCovRV(SymbolicRandomVariable): + default_output = 1 + _print_name = ("_lkjcholeskycov", "\\operatorname{_lkjcholeskycov}") + + def update(self, node): + return {node.inputs[0]: node.outputs[0]} + + +class _LKJCholeskyCov(Distribution): r"""Underlying class for covariance matrix with LKJ distributed correlations. See docs for LKJCholeskyCov function for more details on how to use it in models. """ - rv_op = _ljk_cholesky_cov - def __new__(cls, name, eta, n, sd_dist, **kwargs): - check_dist_not_registered(sd_dist) - return super().__new__(cls, name, eta, n, sd_dist, **kwargs) + rv_type = _LKJCholeskyCovRV @classmethod - def dist(cls, eta, n, sd_dist, **kwargs): - eta = at.as_tensor_variable(floatX(eta)) + def dist(cls, n, eta, sd_dist, **kwargs): n = at.as_tensor_variable(intX(n)) + eta = at.as_tensor_variable(floatX(eta)) if not ( isinstance(sd_dist, Variable) @@ -1185,75 +1185,105 @@ def dist(cls, eta, n, sd_dist, **kwargs): ): raise TypeError("sd_dist must be a scalar or vector distribution variable") + check_dist_not_registered(sd_dist) # sd_dist is part of the generative graph, but should be completely ignored # by the logp graph, since the LKJ logp explicitly includes these terms. - # TODO: Things could be simplified a bit if we managed to extract the - # sd_dist prior components from the logp expression. sd_dist = ignore_logprob(sd_dist) - return super().dist([n, eta, sd_dist], **kwargs) - def moment(rv, size, n, eta, sd_dists): - diag_idxs = (at.cumsum(at.arange(1, n + 1)) - 1).astype("int32") - moment = at.zeros_like(rv) - moment = at.set_subtensor(moment[..., diag_idxs], 1) - return moment + @classmethod + def rv_op(cls, n, eta, sd_dist, size=None): + # We resize the sd_dist automatically so that it has (size x n) independent + # draws which is what the `_LKJCholeskyCovBaseRV.rng_fn` expects. This makes the + # random and logp methods equivalent, as the latter also assumes a unique value + # for each diagonal element. + # Since `eta` and `n` are forced to be scalars we don't need to worry about + # implied batched dimensions from those for the time being. + if size is None: + size = sd_dist.shape[:-1] + shape = tuple(size) + (n,) + if sd_dist.owner.op.ndim_supp == 0: + sd_dist = change_dist_size(sd_dist, shape) + else: + # The support shape must be `n` but we have no way of controlling it + sd_dist = change_dist_size(sd_dist, shape[:-1]) - def logp(value, n, eta, sd_dist): - """ - Calculate log-probability of Covariance matrix with LKJ - distributed correlations at specified value. + # Create new rng for the _lkjcholeskycov internal RV + rng = aesara.shared(np.random.default_rng()) - Parameters - ---------- - value: numeric - Value for which log-probability is calculated. + rng_, n_, eta_, sd_dist_ = rng.type(), n.type(), eta.type(), sd_dist.type() + next_rng_, lkjcov_ = _ljk_cholesky_cov_base(n_, eta_, sd_dist_, rng=rng_).owner.outputs - Returns - ------- - TensorVariable - """ + return _LKJCholeskyCovRV( + inputs=[rng_, n_, eta_, sd_dist_], + outputs=[next_rng_, lkjcov_], + ndim_supp=1, + )(rng, n, eta, sd_dist) - if value.ndim > 1: - raise ValueError("LKJCholeskyCov logp is only implemented for vector values (ndim=1)") - diag_idxs = at.cumsum(at.arange(1, n + 1)) - 1 - cumsum = at.cumsum(value**2) - variance = at.zeros(at.atleast_1d(n)) - variance = at.inc_subtensor(variance[0], value[0] ** 2) - variance = at.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) - sd_vals = at.sqrt(variance) +@_change_dist_size.register(_LKJCholeskyCovRV) +def change_LKJCholeksyCovRV_size(op, dist, new_size, expand=False): + n, eta, sd_dist = dist.owner.inputs[1:] - logp_sd = pm.logp(sd_dist, sd_vals).sum() - corr_diag = value[diag_idxs] / sd_vals + if expand: + old_size = sd_dist.shape[:-1] + new_size = tuple(new_size) + tuple(old_size) - logp_lkj = (2 * eta - 3 + n - at.arange(n)) * at.log(corr_diag) - logp_lkj = at.sum(logp_lkj) + return _LKJCholeskyCov.rv_op(n, eta, sd_dist, size=new_size) - # Compute the log det jacobian of the second transformation - # described in the docstring. - idx = at.arange(n) - det_invjac = at.log(corr_diag) - idx * at.log(sd_vals) - det_invjac = det_invjac.sum() - # TODO: _lkj_normalizing_constant currently requires `eta` and `n` to be constants - if not isinstance(n, Constant): - raise NotImplementedError("logp only implemented for constant `n`") - n = int(n.data) +@_moment.register(_LKJCholeskyCovRV) +def _LKJCholeksyCovRV_moment(op, rv, rng, n, eta, sd_dist): + diag_idxs = (at.cumsum(at.arange(1, n + 1)) - 1).astype("int32") + moment = at.zeros_like(rv) + moment = at.set_subtensor(moment[..., diag_idxs], 1) + return moment - if not isinstance(eta, Constant): - raise NotImplementedError("logp only implemented for constant `eta`") - eta = float(eta.data) - norm = _lkj_normalizing_constant(eta, n) +@_default_transform.register(_LKJCholeskyCovRV) +def _LKJCholeksyCovRV_default_transform(op, rv): + _, n, _, _ = rv.owner.inputs + return transforms.CholeskyCovPacked(n) - return norm + logp_lkj + logp_sd + det_invjac +@_logprob.register(_LKJCholeskyCovRV) +def _LKJCholeksyCovRV_logp(op, values, rng, n, eta, sd_dist, **kwargs): + (value,) = values -@_default_transform.register(_LKJCholeskyCov) -def lkjcholeskycov_default_transform(op, rv): - _, _, _, n, _, _ = rv.owner.inputs - return transforms.CholeskyCovPacked(n) + if value.ndim > 1: + raise ValueError("_LKJCholeskyCov logp is only implemented for vector values (ndim=1)") + + diag_idxs = at.cumsum(at.arange(1, n + 1)) - 1 + cumsum = at.cumsum(value**2) + variance = at.zeros(at.atleast_1d(n)) + variance = at.inc_subtensor(variance[0], value[0] ** 2) + variance = at.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) + sd_vals = at.sqrt(variance) + + logp_sd = pm.logp(sd_dist, sd_vals).sum() + corr_diag = value[diag_idxs] / sd_vals + + logp_lkj = (2 * eta - 3 + n - at.arange(n)) * at.log(corr_diag) + logp_lkj = at.sum(logp_lkj) + + # Compute the log det jacobian of the second transformation + # described in the docstring. + idx = at.arange(n) + det_invjac = at.log(corr_diag) - idx * at.log(sd_vals) + det_invjac = det_invjac.sum() + + # TODO: _lkj_normalizing_constant currently requires `eta` and `n` to be constants + if not isinstance(n, Constant): + raise NotImplementedError("logp only implemented for constant `n`") + n = int(n.data) + + if not isinstance(eta, Constant): + raise NotImplementedError("logp only implemented for constant `eta`") + eta = float(eta.data) + + norm = _lkj_normalizing_constant(eta, n) + + return norm + logp_lkj + logp_sd + det_invjac class LKJCholeskyCov: @@ -1462,7 +1492,7 @@ def rng_fn(cls, rng, n, eta, size): else: flat_size = np.prod(size) - C = cls._random_corr_matrix(rng, n, eta, flat_size) + C = cls._random_corr_matrix(rng=rng, n=n, eta=eta, flat_size=flat_size) triu_idx = np.triu_indices(n, k=1) samples = C[..., triu_idx[0], triu_idx[1]] diff --git a/pymc/tests/distributions/test_multivariate.py b/pymc/tests/distributions/test_multivariate.py index 5cd734bdf0..76052513df 100644 --- a/pymc/tests/distributions/test_multivariate.py +++ b/pymc/tests/distributions/test_multivariate.py @@ -38,8 +38,9 @@ _OrderedMultinomial, quaddist_matrix, ) -from pymc.distributions.shape_utils import to_tuple +from pymc.distributions.shape_utils import change_dist_size, to_tuple from pymc.math import kronecker +from pymc.sampling import draw from pymc.tests.distributions.util import ( BaseTestDistributionRandom, Domain, @@ -881,6 +882,18 @@ def test_sd_dist_automatically_resized(self, sd_dist, size, shape): # LKJCov has support shape `(n * (n+1)) // 2` assert x.eval().shape == (10, 6) + def test_change_dist_size(self): + x1 = pm.LKJCholeskyCov.dist( + n=3, eta=1, sd_dist=pm.Dirichlet.dist(np.ones(3)), size=(5,), compute_corr=False + ) + x2 = change_dist_size(x1, new_size=(10, 3), expand=False) + x3 = change_dist_size(x2, new_size=(3,), expand=True) + + draw_x1, draw_x2, draw_x3 = pm.draw([x1, x2, x3]) + assert draw_x1.shape == (5, 6) + assert draw_x2.shape == (10, 3, 6) + assert draw_x3.shape == (3, 10, 3, 6) + # Used for MvStudentT moment test rand1d = np.random.rand(2) @@ -1783,7 +1796,7 @@ def ref_rand(size, n, eta): class TestLKJCholeskyCov(BaseTestDistributionRandom): pymc_dist = _LKJCholeskyCov - pymc_dist_params = {"eta": 1.0, "n": 3, "sd_dist": pm.DiracDelta.dist([0.5, 1.0, 2.0])} + pymc_dist_params = {"n": 3, "eta": 1.0, "sd_dist": pm.DiracDelta.dist([0.5, 1.0, 2.0])} expected_rv_op_params = {"n": 3, "eta": 1.0, "sd_dist": pm.DiracDelta.dist([0.5, 1.0, 2.0])} size = None @@ -1803,6 +1816,11 @@ class TestLKJCholeskyCov(BaseTestDistributionRandom): "check_draws_match_expected", ] + def _instantiate_pymc_rv(self, dist_params=None): + # RNG cannot be passed through the PyMC class + params = dist_params if dist_params else self.pymc_dist_params + self.pymc_rv = self.pymc_dist.dist(**params, size=self.size) + def check_rv_size(self): for size, expected in zip(self.sizes_to_check, self.sizes_expected): sd_dist = pm.Exponential.dist(1, size=(*to_tuple(size), 3)) @@ -1813,9 +1831,9 @@ def check_rv_size(self): def check_draws_match_expected(self): # TODO: Find better comparison: - rng = aesara.shared(self.get_random_state(reset=True)) - x = _LKJCholeskyCov.dist(n=2, eta=10_000, sd_dist=pm.DiracDelta.dist([0.5, 2.0]), rng=rng) - assert np.all(np.abs(x.eval() - np.array([0.5, 0, 2.0])) < 0.01) + rng = self.get_random_state(reset=True) + x = _LKJCholeskyCov.dist(n=2, eta=10_000, sd_dist=pm.DiracDelta.dist([0.5, 2.0])) + assert np.all(np.abs(draw(x, random_seed=rng) - np.array([0.5, 0, 2.0])) < 0.01) @pytest.mark.parametrize("sparse", [True, False]) From 6b5f33a913940b845be58ada11527dc9e7765d48 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 30 Aug 2022 20:22:35 +0200 Subject: [PATCH 11/13] Convert GaussianRandomWalk into a SymbolicRandomVariable * Implements distribution agnostic univariate RandomWalk --- pymc/distributions/__init__.py | 2 + pymc/distributions/logprob.py | 56 +++- pymc/distributions/timeseries.py | 281 +++++++++----------- pymc/tests/distributions/test_logprob.py | 14 + pymc/tests/distributions/test_timeseries.py | 57 +--- 5 files changed, 211 insertions(+), 199 deletions(-) diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index 0a17fb45e1..c4128b4920 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -109,6 +109,7 @@ GaussianRandomWalk, MvGaussianRandomWalk, MvStudentTRandomWalk, + RandomWalk, ) __all__ = [ @@ -173,6 +174,7 @@ "LKJCholeskyCov", "LKJCorr", "AsymmetricLaplace", + "RandomWalk", "GaussianRandomWalk", "MvGaussianRandomWalk", "MvStudentTRandomWalk", diff --git a/pymc/distributions/logprob.py b/pymc/distributions/logprob.py index a75d2b973d..7e0114a8cc 100644 --- a/pymc/distributions/logprob.py +++ b/pymc/distributions/logprob.py @@ -17,16 +17,20 @@ from typing import Dict, List, Optional, Sequence, Union import aesara -import aesara.tensor as at import numpy as np -from aeppl import factorized_joint_logprob +from aeppl import factorized_joint_logprob, logprob from aeppl.abstract import assign_custom_measurable_outputs +from aeppl.logprob import _logprob from aeppl.logprob import logcdf as logcdf_aeppl from aeppl.logprob import logprob as logp_aeppl +from aeppl.tensor import MeasurableJoin from aeppl.transforms import TransformValuesRewrite +from aesara import tensor as at +from aesara.graph import FunctionGraph, rewrite_graph from aesara.graph.basic import graph_inputs, io_toposort from aesara.tensor.random.op import RandomVariable +from aesara.tensor.rewriting.basic import ShapeFeature, topo_constant_folding from aesara.tensor.subtensor import ( AdvancedIncSubtensor, AdvancedIncSubtensor1, @@ -320,3 +324,51 @@ def ignore_logprob(rv: TensorVariable) -> TensorVariable: return rv new_node = assign_custom_measurable_outputs(node, type_prefix=prefix) return new_node.outputs[node.outputs.index(rv)] + + +@_logprob.register(MeasurableJoin) +def logprob_join_constant_shapes(op, values, axis, *base_vars, **kwargs): + """Compute the log-likelihood graph for a `Join`. + + This overrides the implementation in Aeppl, to constant fold the shapes + of the base vars so that RandomVariables do not show up in the logp graph, + which is a requirement enforced by `pymc.distributions.logprob.joint_logp` + """ + (value,) = values + + base_var_shapes = [base_var.shape[axis] for base_var in base_vars] + + shape_fg = FunctionGraph( + outputs=base_var_shapes, + features=[ShapeFeature()], + clone=True, + ) + base_var_shapes = rewrite_graph(shape_fg, custom_opt=topo_constant_folding).outputs + + split_values = at.split( + value, + splits_size=[base_var_shape for base_var_shape in base_var_shapes], + n_splits=len(base_vars), + axis=axis, + ) + + logps = [ + logprob(base_var, split_value) for base_var, split_value in zip(base_vars, split_values) + ] + + if len({logp.ndim for logp in logps}) != 1: + raise ValueError( + "Joined logps have different number of dimensions, this can happen when " + "joining univariate and multivariate distributions", + ) + + base_vars_ndim_supp = split_values[0].ndim - logps[0].ndim + join_logprob = at.concatenate( + [ + at.atleast_1d(logprob(base_var, split_value)) + for base_var, split_value in zip(base_vars, split_values) + ], + axis=axis - base_vars_ndim_supp, + ) + + return join_logprob diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index aec9776605..42b3e156be 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -13,26 +13,25 @@ # limitations under the License. import warnings -from typing import Any, Optional, Tuple, Union +from typing import Any, Optional, Union import aesara import aesara.tensor as at import numpy as np +from aeppl.abstract import _get_measurable_outputs from aeppl.logprob import _logprob from aesara import scan from aesara.graph import FunctionGraph, rewrite_graph -from aesara.graph.basic import Node +from aesara.graph.basic import Node, clone_replace from aesara.raise_op import Assert from aesara.tensor import TensorVariable from aesara.tensor.random.op import RandomVariable -from aesara.tensor.random.utils import normalize_size_param from aesara.tensor.rewriting.basic import ShapeFeature, topo_constant_folding from pymc.aesaraf import convert_observed_data, floatX, intX from pymc.distributions import distribution, multivariate from pymc.distributions.continuous import Flat, Normal, get_tau_sigma -from pymc.distributions.dist_math import check_parameters from pymc.distributions.distribution import ( Distribution, SymbolicRandomVariable, @@ -46,13 +45,13 @@ _change_dist_size, change_dist_size, convert_dims, - rv_size_is_none, to_tuple, ) from pymc.model import modelcontext from pymc.util import check_dist_not_registered __all__ = [ + "RandomWalk", "GaussianRandomWalk", "MvGaussianRandomWalk", "MvStudentTRandomWalk", @@ -117,109 +116,133 @@ def get_steps( return inferred_steps -class GaussianRandomWalkRV(RandomVariable): - """ - GaussianRandomWalk Random Variable - """ +class RandomWalkRV(SymbolicRandomVariable): + """RandomWalk Variable""" - name = "GaussianRandomWalk" - ndim_supp = 1 - ndims_params = [0, 0, 0, 0] - dtype = "floatX" - _print_name = ("GaussianRandomWalk", "\\operatorname{GaussianRandomWalk}") + default_output = 0 + _print_name = ("RandomWalk", "\\operatorname{RandomWalk}") - def make_node(self, rng, size, dtype, mu, sigma, init_dist, steps): - steps = at.as_tensor_variable(steps) - if not steps.ndim == 0 or not steps.dtype.startswith("int"): - raise ValueError("steps must be an integer scalar (ndim=0).") - mu = at.as_tensor_variable(mu) - sigma = at.as_tensor_variable(sigma) - init_dist = at.as_tensor_variable(init_dist) +class RandomWalk(Distribution): + r"""RandomWalk Distribution - # Resize init distribution - size = normalize_size_param(size) - # If not explicit, size is determined by the shapes of mu, sigma, and init - init_dist_size = ( - size if not rv_size_is_none(size) else at.broadcast_shape(mu, sigma, init_dist) - ) - init_dist = change_dist_size(init_dist, init_dist_size) + TODO: Expand docstrings + """ - return super().make_node(rng, size, dtype, mu, sigma, init_dist, steps) + rv_type = RandomWalkRV - def _supp_shape_from_params(self, dist_params, reop_param_idx=0, param_shapes=None): - steps = dist_params[3] + def __new__(cls, *args, steps=None, **kwargs): + steps = get_steps( + steps=steps, + shape=None, # Shape will be checked in `cls.dist` + dims=kwargs.get("dims", None), + observed=kwargs.get("observed", None), + step_shape_offset=1, + ) + return super().__new__(cls, *args, steps=steps, **kwargs) - return (steps + 1,) + @classmethod + def dist(cls, init_dist, innovation_dist, steps=None, **kwargs) -> at.TensorVariable: + steps = get_steps( + steps=steps, + shape=kwargs.get("shape"), + step_shape_offset=1, + ) + if steps is None: + raise ValueError("Must specify steps or shape parameter") + steps = at.as_tensor_variable(intX(steps)) + + if not ( + isinstance(init_dist, at.TensorVariable) + and init_dist.owner is not None + and isinstance(init_dist.owner.op, RandomVariable) + # TODO: Lift univariate constraint on init_dist + and init_dist.owner.op.ndim_supp == 0 + ): + raise TypeError("init_dist must be a univariate distribution variable") + check_dist_not_registered(init_dist) + + if not ( + isinstance(innovation_dist, at.TensorVariable) + and init_dist.owner is not None + and isinstance(init_dist.owner.op, RandomVariable) + # TODO: Lift univariate constraint on inovvation_dist + and init_dist.owner.op.ndim_supp == 0 + ): + raise TypeError("init_dist must be a univariate distribution variable") + check_dist_not_registered(init_dist) + + return super().dist([init_dist, innovation_dist, steps], **kwargs) @classmethod - def rng_fn( - cls, - rng: np.random.RandomState, - mu: Union[np.ndarray, float], - sigma: Union[np.ndarray, float], - init_dist: Union[np.ndarray, float], - steps: int, - size: Tuple[int], - ) -> np.ndarray: - """Gaussian Random Walk generator. - - The init value is drawn from the Normal distribution with the same sigma as the - innovations. - - Notes - ----- - Currently does not support custom init distribution + def rv_op(cls, init_dist, innovation_dist, steps, size=None): + if not steps.ndim == 0 or not steps.dtype.startswith("int"): + raise ValueError("steps must be an integer scalar (ndim=0).") - Parameters - ---------- - rng: np.random.RandomState - Numpy random number generator - mu: array_like of float - Random walk mean - sigma: array_like of float - Standard deviation of innovation (sigma > 0) - init_dist: array_like of float - Initialization value for GaussianRandomWalk - steps: int - Length of random walk, must be greater than 1. Returned array will be of size+1 to - account as first value is initial value - size: tuple of int - The number of Random Walk time series generated + # If not explicit, size is determined by the shapes of the input distributions + if size is None: + size = at.broadcast_shape(init_dist, innovation_dist[..., 0]) + innovation_size = tuple(size) + (steps,) + + # Resize input distributions + init_dist = change_dist_size(init_dist, size) + innovation_dist = change_dist_size(innovation_dist, innovation_size) + + # Create SymbolicRV + init_dist_, innovation_dist_, steps_ = ( + init_dist.type(), + innovation_dist.type(), + steps.type(), + ) + grw_ = at.concatenate([init_dist_[..., None], innovation_dist_], axis=-1) + grw_ = at.cumsum(grw_, axis=-1) + return RandomWalkRV( + [init_dist_, innovation_dist_, steps_], + # We pass steps_ through just so we can keep a reference to it, even though + # it's no longer needed at this point + [grw_, steps_], + ndim_supp=1, + )(init_dist, innovation_dist, steps) - Returns - ------- - ndarray - """ - if steps < 1: - raise ValueError("Steps must be greater than 0") +@_get_measurable_outputs.register(RandomWalkRV) +def _get_measurable_outputs_random_walk(op, node): + # Ignore steps output + return [node.default_output()] - # If size is None then the returned series should be (*implied_dims, 1+steps) - if size is None: - # broadcast parameters with each other to find implied dims - bcast_shape = np.broadcast_shapes( - np.asarray(mu).shape, - np.asarray(sigma).shape, - np.asarray(init_dist).shape, - ) - dist_shape = (*bcast_shape, int(steps)) - # If size is None then the returned series should be (*size, 1+steps) - else: - dist_shape = (*size, int(steps)) +@_change_dist_size.register(RandomWalkRV) +def change_random_walk_size(op, dist, new_size, expand): + init_dist, innovation_dist, steps = dist.owner.inputs + if expand: + old_size = init_dist.shape + new_size = tuple(new_size) + tuple(old_size) + return RandomWalk.rv_op(init_dist, innovation_dist, steps, size=new_size) - # Add one dimension to the right, so that mu and sigma broadcast safely along - # the steps dimension - innovations = rng.normal(loc=mu[..., None], scale=sigma[..., None], size=dist_shape) - grw = np.concatenate([init_dist[..., None], innovations], axis=-1) - return np.cumsum(grw, axis=-1) +@_moment.register(RandomWalkRV) +def random_walk_moment(op, rv, init_dist, innovation_dist, steps): + grw_moment = at.zeros_like(rv) + grw_moment = at.set_subtensor(grw_moment[..., 0], moment(init_dist)) + grw_moment = at.set_subtensor(grw_moment[..., 1:], moment(innovation_dist)) + return at.cumsum(grw_moment, axis=-1) -gaussianrandomwalk = GaussianRandomWalkRV() +@_logprob.register(RandomWalkRV) +def random_walk_logp(op, values, *inputs, **kwargs): + # ALthough Aeppl can derive the logprob of random walks, it does not collapse + # what PyMC considers the core dimension of steps. We do it manually here. + (value,) = values + # Recreate RV and obtain inner graph + rv_node = op.make_node(*inputs) + rv = clone_replace( + op.inner_outputs, replace={u: v for u, v in zip(op.inner_inputs, rv_node.inputs)} + )[op.default_output] + # Obtain logp via Aeppl of inner graph and collapse steps dimension + return logp(rv, value).sum(axis=-1) -class GaussianRandomWalk(distribution.Continuous): + +class GaussianRandomWalk: r"""Random Walk with Normal innovations. Parameters @@ -239,33 +262,25 @@ class GaussianRandomWalk(distribution.Continuous): provided. """ - rv_op = gaussianrandomwalk - - def __new__(cls, *args, steps=None, **kwargs): - steps = get_steps( - steps=steps, - shape=None, # Shape will be checked in `cls.dist` - dims=kwargs.get("dims", None), - observed=kwargs.get("observed", None), - step_shape_offset=1, + def __new__(cls, name, mu=0.0, sigma=1.0, *, init_dist=None, steps=None, **kwargs): + init_dist, innovation_dist, kwargs = cls.get_dists( + mu=mu, sigma=sigma, init_dist=init_dist, **kwargs + ) + return RandomWalk( + name, init_dist=init_dist, innovation_dist=innovation_dist, steps=steps, **kwargs ) - return super().__new__(cls, *args, steps=steps, **kwargs) @classmethod def dist(cls, mu=0.0, sigma=1.0, *, init_dist=None, steps=None, **kwargs) -> at.TensorVariable: - - mu = at.as_tensor_variable(floatX(mu)) - sigma = at.as_tensor_variable(floatX(sigma)) - - steps = get_steps( - steps=steps, - shape=kwargs.get("shape"), - step_shape_offset=1, + init_dist, innovation_dist, kwargs = cls.get_dists( + mu=mu, sigma=sigma, init_dist=init_dist, **kwargs + ) + return RandomWalk.dist( + init_dist=init_dist, innovation_dist=innovation_dist, steps=steps, **kwargs ) - if steps is None: - raise ValueError("Must specify steps or shape parameter") - steps = at.as_tensor_variable(intX(steps)) + @classmethod + def get_dists(cls, *, mu, sigma, init_dist, **kwargs): if "init" in kwargs: warnings.warn( "init parameter is now called init_dist. Using init will raise an error in a future release.", @@ -281,52 +296,14 @@ def dist(cls, mu=0.0, sigma=1.0, *, init_dist=None, steps=None, **kwargs) -> at. UserWarning, ) init_dist = Normal.dist(0, 100) - else: - if not ( - isinstance(init_dist, at.TensorVariable) - and init_dist.owner is not None - and isinstance(init_dist.owner.op, RandomVariable) - and init_dist.owner.op.ndim_supp == 0 - ): - raise TypeError("init must be a univariate distribution variable") - check_dist_not_registered(init_dist) - # Ignores logprob of init var because that's accounted for in the logp method - init_dist = ignore_logprob(init_dist) - - return super().dist([mu, sigma, init_dist, steps], **kwargs) - - def moment(rv, size, mu, sigma, init_dist, steps): - grw_moment = at.zeros_like(rv) - grw_moment = at.set_subtensor(grw_moment[..., 0], moment(init_dist)) - # Add one dimension to the right, so that mu broadcasts safely along the steps - # dimension - grw_moment = at.set_subtensor(grw_moment[..., 1:], mu[..., None]) - return at.cumsum(grw_moment, axis=-1) - - def logp( - value: at.Variable, - mu: at.Variable, - sigma: at.Variable, - init_dist: at.Variable, - steps: at.Variable, - ) -> at.TensorVariable: - """Calculate log-probability of Gaussian Random Walk distribution at specified value.""" - - # Calculate initialization logp - init_logp = logp(init_dist, value[..., 0]) - - # Make time series stationary around the mean value - stationary_series = value[..., 1:] - value[..., :-1] # Add one dimension to the right, so that mu and sigma broadcast safely along # the steps dimension - series_logp = logp(Normal.dist(mu[..., None], sigma[..., None]), stationary_series) + mu = at.as_tensor_variable(mu) + sigma = at.as_tensor_variable(sigma) + innovation_dist = Normal.dist(mu=mu[..., None], sigma=sigma[..., None]) - return check_parameters( - init_logp + series_logp.sum(axis=-1), - steps > 0, - msg="steps > 0", - ) + return init_dist, innovation_dist, kwargs class AutoRegressiveRV(SymbolicRandomVariable): diff --git a/pymc/tests/distributions/test_logprob.py b/pymc/tests/distributions/test_logprob.py index 731226423a..26bc0c4ff7 100644 --- a/pymc/tests/distributions/test_logprob.py +++ b/pymc/tests/distributions/test_logprob.py @@ -378,3 +378,17 @@ def test_hierarchical_obs_logp(): ops = {a.owner.op for a in logp_ancestors if a.owner} assert len(ops) > 0 assert not any(isinstance(o, RandomVariable) for o in ops) + + +def test_logprob_join_constant_shapes(): + x = at.random.normal(size=5) + y = at.random.normal(size=3) + xy = at.join(x, y) + xy_vv = at.vector("xy_vv") + + xy_logp = logp(xy, xy_vv) + # This is what Aeppl does not do! + assert_no_rvs(xy_logp) + + f = aesara.function([xy_vv], xy_logp) + np.testing.assert_array_equal(f(np.zeros(8)), sp.norm.logpdf(np.zeros(8))) diff --git a/pymc/tests/distributions/test_timeseries.py b/pymc/tests/distributions/test_timeseries.py index 7cafdef191..545a45a20f 100644 --- a/pymc/tests/distributions/test_timeseries.py +++ b/pymc/tests/distributions/test_timeseries.py @@ -39,8 +39,6 @@ from pymc.model import Model from pymc.sampling import draw, sample, sample_posterior_predictive from pymc.tests.distributions.util import ( - BaseTestDistributionRandom, - Nat, R, Rplus, Vector, @@ -101,51 +99,20 @@ def test_get_steps(info_source, steps, shape, step_shape_offset, expected_steps, class TestGaussianRandomWalk: - class TestGaussianRandomWalkRandom(BaseTestDistributionRandom): - # Override default size for test class - size = None - - pymc_dist = pm.GaussianRandomWalk - pymc_dist_params = {"mu": 1.0, "sigma": 2, "init_dist": pm.DiracDelta.dist(0), "steps": 4} - expected_rv_op_params = { - "mu": 1.0, - "sigma": 2, - "init_dist": pm.DiracDelta.dist(0), - "steps": 4, - } - - checks_to_run = [ - "check_pymc_params_match_rv_op", - "check_rv_inferred_size", - ] - - def check_rv_inferred_size(self): - steps = self.pymc_dist_params["steps"] - sizes_to_check = [None, (), 1, (1,)] - sizes_expected = [(steps + 1,), (steps + 1,), (1, steps + 1), (1, steps + 1)] - - for size, expected in zip(sizes_to_check, sizes_expected): - pymc_rv = self.pymc_dist.dist(**self.pymc_dist_params, size=size) - expected_symbolic = tuple(pymc_rv.shape.eval()) - assert expected_symbolic == expected - - def test_steps_scalar_check(self): - with pytest.raises(ValueError, match="steps must be an integer scalar"): - self.pymc_dist.dist(steps=[1], init_dist=pm.DiracDelta.dist(0)) - def test_logp(self): - def ref_logp(value, mu, sigma, steps): + def ref_logp(value, mu, sigma): return ( - st.norm.logpdf(value[0], 0, 100) + st.norm.logpdf(np.diff(value), mu, sigma).sum() + st.norm.logpdf(value[0], 0, 100) # default init_dist has a scale 100 + + st.norm.logpdf(np.diff(value), mu, sigma).sum() ) check_logp( pm.GaussianRandomWalk, Vector(R, 4), - {"mu": R, "sigma": Rplus, "steps": Nat}, + {"mu": R, "sigma": Rplus}, ref_logp, decimal=select_by_precision(float64=6, float32=1), - extra_args={"init_dist": Normal.dist(0, 100)}, + extra_args={"steps": 3, "init_dist": Normal.dist(0, 100)}, ) def test_gaussianrandomwalk_inference(self): @@ -173,27 +140,27 @@ def test_gaussian_random_walk_init_dist_shape(self, init): with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Initial distribution not specified.*", UserWarning) grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init_dist=init) - assert tuple(grw.owner.inputs[-2].shape.eval()) == () + assert tuple(grw.owner.inputs[0].shape.eval()) == () grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init_dist=init, size=(5,)) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,) + assert tuple(grw.owner.inputs[0].shape.eval()) == (5,) grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init_dist=init, shape=2) - assert tuple(grw.owner.inputs[-2].shape.eval()) == () + assert tuple(grw.owner.inputs[0].shape.eval()) == () grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init_dist=init, shape=(5, 2)) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,) + assert tuple(grw.owner.inputs[0].shape.eval()) == (5,) grw = pm.GaussianRandomWalk.dist(mu=[0, 0], sigma=1, steps=1, init_dist=init) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (2,) + assert tuple(grw.owner.inputs[0].shape.eval()) == (2,) grw = pm.GaussianRandomWalk.dist(mu=0, sigma=[1, 1], steps=1, init_dist=init) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (2,) + assert tuple(grw.owner.inputs[0].shape.eval()) == (2,) grw = pm.GaussianRandomWalk.dist( mu=np.zeros((3, 1)), sigma=[1, 1], steps=1, init_dist=init ) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (3, 2) + assert tuple(grw.owner.inputs[0].shape.eval()) == (3, 2) def test_gaussianrandomwalk_broadcasted_by_init_dist(self): grw = pm.GaussianRandomWalk.dist( From f6c8aacf880867bd4ea6c2afb12d423c5a8aeb08 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 30 Aug 2022 11:21:45 +0200 Subject: [PATCH 12/13] Avoid reusing RNGs across distinct RandomVariables This risk is present when resizing a RandomVariable, whose new size depends on the size of the original RandomVariable. This can lead to wrong update expressions for the reused RNG variables that still depended on the original RandomVariable. Similarly, there is a risk when clone-replacing RandomVariables, as the cloned variables will share the same RNGs. This happened when attempting to use Metropolis with Simulator variables. * change_rv_size does not reuse old RNG * compile_pymc raises if distinct update expression are inferred for the same RNG --- pymc/aesaraf.py | 42 ++++++++++++++++++-- pymc/distributions/shape_utils.py | 31 +++++++++------ pymc/distributions/simulator.py | 4 +- pymc/initial_point.py | 16 ++------ pymc/step_methods/metropolis.py | 15 +++++-- pymc/tests/distributions/test_shape_utils.py | 23 +++++++---- pymc/tests/test_aesaraf.py | 37 +++++++++++++++++ 7 files changed, 127 insertions(+), 41 deletions(-) diff --git a/pymc/aesaraf.py b/pymc/aesaraf.py index 235c736596..f9ce65ff82 100644 --- a/pymc/aesaraf.py +++ b/pymc/aesaraf.py @@ -864,6 +864,31 @@ def find_rng_nodes( ] +def replace_rng_nodes(outputs: Sequence[TensorVariable]) -> Sequence[TensorVariable]: + """Replace any RNG nodes upsteram of outputs by new RNGs of the same type + + This can be used when combining a pre-existing graph with a cloned one, to ensure + RNGs are unique across the two graphs. + """ + rng_nodes = find_rng_nodes(outputs) + + # Nothing to do here + if not rng_nodes: + return outputs + + graph = FunctionGraph(outputs=outputs, clone=False) + new_rng_nodes: List[Union[np.random.RandomState, np.random.Generator]] = [] + for rng_node in rng_nodes: + rng_cls: type + if isinstance(rng_node, at.random.var.RandomStateSharedVariable): + rng_cls = np.random.RandomState + else: + rng_cls = np.random.Generator + new_rng_nodes.append(aesara.shared(rng_cls(np.random.PCG64()))) + graph.replace_all(zip(rng_nodes, new_rng_nodes), import_missing=True) + return graph.outputs + + SeedSequenceSeed = Optional[Union[int, Sequence[int], np.ndarray, np.random.SeedSequence]] @@ -944,12 +969,21 @@ def compile_pymc( assert random_var.owner.op is not None if isinstance(random_var.owner.op, RandomVariable): rng = random_var.owner.inputs[0] - if not hasattr(rng, "default_update"): - rng_updates[rng] = random_var.owner.outputs[0] + if hasattr(rng, "default_update"): + update_map = {rng: rng.default_update} else: - rng_updates[rng] = rng.default_update + update_map = {rng: random_var.owner.outputs[0]} else: - rng_updates.update(random_var.owner.op.update(random_var.owner)) + update_map = random_var.owner.op.update(random_var.owner) + # Check that we are not setting different update expressions for the same variables + for rng, update in update_map.items(): + if rng not in rng_updates: + rng_updates[rng] = update + # When a variable has multiple outputs, it will be called twice with the same + # update expression. We don't want to raise in that case, only if the update + # expression in different from the one already registered + elif rng_updates[rng] is not update: + raise ValueError(f"Multiple update expressions found for the variable {rng}") # We always reseed random variables as this provides RNGs with no chances of collision if rng_updates: diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py index 8c04aed28f..74a96ba4b1 100644 --- a/pymc/distributions/shape_utils.py +++ b/pymc/distributions/shape_utils.py @@ -17,6 +17,8 @@ A collection of common shape operations needed for broadcasting samples from probability distributions for stochastic nodes in PyMC. """ +import warnings + from functools import singledispatch from typing import Optional, Sequence, Tuple, Union @@ -579,8 +581,8 @@ def change_dist_size( Returns ------- A new distribution variable that is equivalent to the original distribution with - the new size. The new distribution may reuse the same RandomState/Generator inputs - as the original distribution. + the new size. The new distribution will not reuse the old RandomState/Generator + input, so it will be independent from the original distribution. Examples -------- @@ -618,24 +620,29 @@ def change_dist_size( def change_rv_size(op, rv, new_size, expand) -> TensorVariable: # Extract the RV node that is to be resized rv_node = rv.owner - rng, size, dtype, *dist_params = rv_node.inputs + old_rng, old_size, dtype, *dist_params = rv_node.inputs if expand: - shape = tuple(rv_node.op._infer_shape(size, dist_params)) - size = shape[: len(shape) - rv_node.op.ndim_supp] - new_size = tuple(new_size) + tuple(size) + shape = tuple(rv_node.op._infer_shape(old_size, dist_params)) + old_size = shape[: len(shape) - rv_node.op.ndim_supp] + new_size = tuple(new_size) + tuple(old_size) # Make sure the new size is a tensor. This dtype-aware conversion helps # to not unnecessarily pick up a `Cast` in some cases (see #4652). new_size = at.as_tensor(new_size, ndim=1, dtype="int64") - new_rv_node = rv_node.op.make_node(rng, new_size, dtype, *dist_params) - new_rv = new_rv_node.outputs[-1] + new_rv = rv_node.op(*dist_params, size=new_size, dtype=dtype) - # Update "traditional" rng default_update, if that was set for old RV - default_update = getattr(rng, "default_update", None) - if default_update is not None and default_update is rv_node.outputs[0]: - rng.default_update = new_rv_node.outputs[0] + # Replicate "traditional" rng default_update, if that was set for old_rng + default_update = getattr(old_rng, "default_update", None) + if default_update is not None: + if default_update is rv_node.outputs[0]: + new_rv.owner.inputs[0].default_update = new_rv.owner.outputs[0] + else: + warnings.warn( + f"Update expression of {rv} RNG could not be replicated in resized variable", + UserWarning, + ) return new_rv diff --git a/pymc/distributions/simulator.py b/pymc/distributions/simulator.py index a1c8d5d2a3..e29979def5 100644 --- a/pymc/distributions/simulator.py +++ b/pymc/distributions/simulator.py @@ -248,10 +248,10 @@ def logp(cls, value, sim_op, sim_inputs): # TODO: Model rngs should be updated prior to multiprocessing split, # in which case this would not be needed. However, that would have to be # done for every sampler that may accomodate Simulators - rng = aesara.shared(np.random.default_rng()) + rng = aesara.shared(np.random.default_rng(), name="simulator_rng") # Create a new simulatorRV with identical inputs as the original one sim_value = sim_op.make_node(rng, *sim_inputs[1:]).default_output() - sim_value.name = "sim_value" + sim_value.name = "simulator_value" return sim_op.distance( sim_op.epsilon, diff --git a/pymc/initial_point.py b/pymc/initial_point.py index 11e80d0fa2..7b09f856b1 100644 --- a/pymc/initial_point.py +++ b/pymc/initial_point.py @@ -24,7 +24,7 @@ from aesara.graph.fg import FunctionGraph from aesara.tensor.var import TensorVariable -from pymc.aesaraf import compile_pymc, find_rng_nodes, reseed_rngs +from pymc.aesaraf import compile_pymc, find_rng_nodes, replace_rng_nodes, reseed_rngs from pymc.util import get_transformed_name, get_untransformed_name, is_transformed_name StartDict = Dict[Union[Variable, str], Union[np.ndarray, Variable, str]] @@ -167,18 +167,8 @@ def make_initial_point_fn( # Replace original rng shared variables so that we don't mess with them # when calling the final seeded function - graph = FunctionGraph(outputs=initial_values, clone=False) - rng_nodes = find_rng_nodes(graph.outputs) - new_rng_nodes: List[Union[np.random.RandomState, np.random.Generator]] = [] - for rng_node in rng_nodes: - rng_cls: type - if isinstance(rng_node, at.random.var.RandomStateSharedVariable): - rng_cls = np.random.RandomState - else: - rng_cls = np.random.Generator - new_rng_nodes.append(aesara.shared(rng_cls(np.random.PCG64()))) - graph.replace_all(zip(rng_nodes, new_rng_nodes), import_missing=True) - func = compile_pymc(inputs=[], outputs=graph.outputs, mode=aesara.compile.mode.FAST_COMPILE) + initial_values = replace_rng_nodes(initial_values) + func = compile_pymc(inputs=[], outputs=initial_values, mode=aesara.compile.mode.FAST_COMPILE) varnames = [] for var in model.free_RVs: diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py index 418c65eb2d..e036b0f667 100644 --- a/pymc/step_methods/metropolis.py +++ b/pymc/step_methods/metropolis.py @@ -23,7 +23,14 @@ import pymc as pm -from pymc.aesaraf import compile_pymc, floatX, rvs_to_value_vars +from pymc.aesaraf import ( + CallableTensor, + compile_pymc, + floatX, + join_nonshared_inputs, + replace_rng_nodes, + rvs_to_value_vars, +) from pymc.blocking import DictToArrayBijection, RaveledVars from pymc.step_methods.arraystep import ( ArrayStep, @@ -1046,12 +1053,14 @@ def sample_except(limit, excluded): def delta_logp(point, logp, vars, shared): - [logp0], inarray0 = pm.join_nonshared_inputs(point, [logp], vars, shared) + [logp0], inarray0 = join_nonshared_inputs(point, [logp], vars, shared) tensor_type = inarray0.type inarray1 = tensor_type("inarray1") - logp1 = pm.CallableTensor(logp0)(inarray1) + logp1 = CallableTensor(logp0)(inarray1) + # Replace any potential duplicated RNG nodes + (logp1,) = replace_rng_nodes((logp1,)) f = compile_pymc([inarray1, inarray0], logp1 - logp0) f.trust_input = True diff --git a/pymc/tests/distributions/test_shape_utils.py b/pymc/tests/distributions/test_shape_utils.py index 825a2fa75c..8dad510a85 100644 --- a/pymc/tests/distributions/test_shape_utils.py +++ b/pymc/tests/distributions/test_shape_utils.py @@ -506,7 +506,8 @@ def test_rv_size_is_none(): def test_change_rv_size(): loc = at.as_tensor_variable([1, 2]) - rv = normal(loc=loc) + rng = aesara.shared(np.random.default_rng()) + rv = normal(loc=loc, rng=rng) assert rv.ndim == 1 assert tuple(rv.shape.eval()) == (2,) @@ -525,6 +526,9 @@ def test_change_rv_size(): assert loc in rv_new_ancestors assert rv not in rv_new_ancestors + # Check that the old rng is not reused + assert rv_new.owner.inputs[0] is not rng + rv_newer = change_dist_size(rv_new, new_size=(4,), expand=True) assert rv_newer.ndim == 3 assert tuple(rv_newer.shape.eval()) == (4, 3, 2) @@ -555,22 +559,27 @@ def test_change_rv_size_default_update(): rng = aesara.shared(np.random.default_rng(0)) x = normal(rng=rng) - # Test that "traditional" default_update is updated + # Test that "traditional" default_update is translated to the new rng rng.default_update = x.owner.outputs[0] new_x = change_dist_size(x, new_size=(2,)) - assert rng.default_update is not x.owner.outputs[0] - assert rng.default_update is new_x.owner.outputs[0] + new_rng = new_x.owner.inputs[0] + assert rng.default_update is x.owner.outputs[0] + assert new_rng.default_update is new_x.owner.outputs[0] - # Test that "non-traditional" default_update is left unchanged + # Test that "non-traditional" default_update raises UserWarning next_rng = aesara.shared(np.random.default_rng(1)) rng.default_update = next_rng - new_x = change_dist_size(x, new_size=(2,)) + with pytest.warns(UserWarning, match="could not be replicated in resized variable"): + new_x = change_dist_size(x, new_size=(2,)) + new_rng = new_x.owner.inputs[0] assert rng.default_update is next_rng + assert not hasattr(new_rng, "default_update") # Test that default_update is not set if there was none before del rng.default_update new_x = change_dist_size(x, new_size=(2,)) - assert not hasattr(rng, "default_update") + new_rng = new_x.owner.inputs[0] + assert not hasattr(new_rng, "default_update") def test_change_specify_shape_size_univariate(): diff --git a/pymc/tests/test_aesaraf.py b/pymc/tests/test_aesaraf.py index 681f0a1970..bd618372b2 100644 --- a/pymc/tests/test_aesaraf.py +++ b/pymc/tests/test_aesaraf.py @@ -37,6 +37,7 @@ compile_pymc, convert_observed_data, extract_obs_data, + replace_rng_nodes, reseed_rngs, rvs_to_value_vars, walk_model, @@ -502,6 +503,42 @@ def test_random_seed(self): assert x3_eval == x2_eval assert y3_eval == y2_eval + def test_multiple_updates_same_variable(self): + rng = aesara.shared(np.random.default_rng(), name="rng") + x = at.random.normal(rng=rng) + y = at.random.normal(rng=rng) + + assert compile_pymc([], [x]) + assert compile_pymc([], [y]) + msg = "Multiple update expressions found for the variable rng" + with pytest.raises(ValueError, match=msg): + compile_pymc([], [x, y]) + + +def test_replace_rng_nodes(): + rng = aesara.shared(np.random.default_rng()) + x = at.random.normal(rng=rng) + x_rng, *x_non_rng_inputs = x.owner.inputs + + cloned_x = x.owner.clone().default_output() + cloned_x_rng, *cloned_x_non_rng_inputs = cloned_x.owner.inputs + + # RNG inputs are the same across the two variables + assert x_rng is cloned_x_rng + + (new_x,) = replace_rng_nodes([cloned_x]) + new_x_rng, *new_x_non_rng_inputs = new_x.owner.inputs + + # Variables are still the same + assert new_x is cloned_x + + # RNG inputs are not the same as before + assert new_x_rng is not x_rng + + # All other inputs are the same as before + for non_rng_inputs, new_non_rng_inputs in zip(x_non_rng_inputs, new_x_non_rng_inputs): + assert non_rng_inputs is new_non_rng_inputs + def test_reseed_rngs(): # Reseed_rngs uses the `PCG64` bit_generator, which is currently the default From 8653d18b170c922980851269fa350e2c5aaee35b Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 26 Aug 2022 17:12:50 +0200 Subject: [PATCH 13/13] Enable nested SymbolicRandomVariables --- pymc/distributions/censored.py | 4 +++- pymc/distributions/mixture.py | 2 +- pymc/distributions/multivariate.py | 2 +- pymc/distributions/timeseries.py | 6 +++--- pymc/printing.py | 14 +++++++++++--- pymc/tests/distributions/test_mixture.py | 20 +++++++++++++------- pymc/tests/test_printing.py | 22 ++++++++++++++++++++-- 7 files changed, 52 insertions(+), 18 deletions(-) diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py index 4b8a072fce..ab4cdb3f67 100644 --- a/pymc/distributions/censored.py +++ b/pymc/distributions/censored.py @@ -86,7 +86,9 @@ class Censored(Distribution): @classmethod def dist(cls, dist, lower, upper, **kwargs): - if not isinstance(dist, TensorVariable) or not isinstance(dist.owner.op, RandomVariable): + if not isinstance(dist, TensorVariable) or not isinstance( + dist.owner.op, (RandomVariable, SymbolicRandomVariable) + ): raise ValueError( f"Censoring dist must be a distribution created via the `.dist()` API, got {type(dist)}" ) diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py index bb28fb4763..d8c509fade 100644 --- a/pymc/distributions/mixture.py +++ b/pymc/distributions/mixture.py @@ -191,7 +191,7 @@ def dist(cls, w, comp_dists, **kwargs): # TODO: Allow these to not be a RandomVariable as long as we can call `ndim_supp` on them # and resize them if not isinstance(dist, TensorVariable) or not isinstance( - dist.owner.op, RandomVariable + dist.owner.op, (RandomVariable, SymbolicRandomVariable) ): raise ValueError( f"Component dist must be a distribution created via the `.dist()` API, got {type(dist)}" diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index 352494e395..a2dfb9500a 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -1180,7 +1180,7 @@ def dist(cls, n, eta, sd_dist, **kwargs): if not ( isinstance(sd_dist, Variable) and sd_dist.owner is not None - and isinstance(sd_dist.owner.op, RandomVariable) + and isinstance(sd_dist.owner.op, (RandomVariable, SymbolicRandomVariable)) and sd_dist.owner.op.ndim_supp < 2 ): raise TypeError("sd_dist must be a scalar or vector distribution variable") diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index 42b3e156be..bb23cce574 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -155,7 +155,7 @@ def dist(cls, init_dist, innovation_dist, steps=None, **kwargs) -> at.TensorVari if not ( isinstance(init_dist, at.TensorVariable) and init_dist.owner is not None - and isinstance(init_dist.owner.op, RandomVariable) + and isinstance(init_dist.owner.op, (RandomVariable, SymbolicRandomVariable)) # TODO: Lift univariate constraint on init_dist and init_dist.owner.op.ndim_supp == 0 ): @@ -165,7 +165,7 @@ def dist(cls, init_dist, innovation_dist, steps=None, **kwargs) -> at.TensorVari if not ( isinstance(innovation_dist, at.TensorVariable) and init_dist.owner is not None - and isinstance(init_dist.owner.op, RandomVariable) + and isinstance(init_dist.owner.op, (RandomVariable, SymbolicRandomVariable)) # TODO: Lift univariate constraint on inovvation_dist and init_dist.owner.op.ndim_supp == 0 ): @@ -434,7 +434,7 @@ def dist( if init_dist is not None: if not isinstance(init_dist, TensorVariable) or not isinstance( - init_dist.owner.op, RandomVariable + init_dist.owner.op, (RandomVariable, SymbolicRandomVariable) ): raise ValueError( f"Init dist must be a distribution created via the `.dist()` API, " diff --git a/pymc/printing.py b/pymc/printing.py index 3018e6ba53..b89d342b62 100644 --- a/pymc/printing.py +++ b/pymc/printing.py @@ -151,6 +151,9 @@ def str_for_potential_or_deterministic( def _str_for_input_var(var: Variable, formatting: str) -> str: + # Avoid circular import + from pymc.distributions.distribution import SymbolicRandomVariable + def _is_potential_or_determinstic(var: Variable) -> bool: try: return var.str_repr.__func__.func is str_for_potential_or_deterministic @@ -160,7 +163,9 @@ def _is_potential_or_determinstic(var: Variable) -> bool: if isinstance(var, TensorConstant): return _str_for_constant(var, formatting) - elif isinstance(var.owner.op, RandomVariable) or _is_potential_or_determinstic(var): + elif isinstance( + var.owner.op, (RandomVariable, SymbolicRandomVariable) + ) or _is_potential_or_determinstic(var): # show the names for RandomVariables, Deterministics, and Potentials, rather # than the full expression return _str_for_input_rv(var, formatting) @@ -194,15 +199,18 @@ def _str_for_constant(var: TensorConstant, formatting: str) -> str: def _str_for_expression(var: Variable, formatting: str) -> str: + # Avoid circular import + from pymc.distributions.distribution import SymbolicRandomVariable + # construct a string like f(a1, ..., aN) listing all random variables a as arguments def _expand(x): - if x.owner and (not isinstance(x.owner.op, RandomVariable)): + if x.owner and (not isinstance(x.owner.op, (RandomVariable, SymbolicRandomVariable))): return reversed(x.owner.inputs) parents = [ x for x in walk(nodes=var.owner.inputs, expand=_expand) - if x.owner and isinstance(x.owner.op, RandomVariable) + if x.owner and isinstance(x.owner.op, (RandomVariable, SymbolicRandomVariable)) ] names = [x.name for x in parents] diff --git a/pymc/tests/distributions/test_mixture.py b/pymc/tests/distributions/test_mixture.py index c4e173a516..ca81a4c718 100644 --- a/pymc/tests/distributions/test_mixture.py +++ b/pymc/tests/distributions/test_mixture.py @@ -599,13 +599,17 @@ def test_list_mvnormals_predictive_sampling_shape(self): assert prior["mu0"].shape == (n_samples, D) assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2) - @pytest.mark.xfail(reason="Nested mixtures not refactored yet") def test_nested_mixture(self): if aesara.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 nbr = 4 + + norm_x = generate_normal_mixture_data( + np.r_[0.75, 0.25], np.r_[0.0, 5.0], np.r_[1.0, 1.0], size=1000 + ) + with Model() as model: # mixtures components g_comp = Normal.dist( @@ -622,7 +626,7 @@ def test_nested_mixture(self): l_mix = Mixture.dist(w=l_w, comp_dists=l_comp) # mixture of mixtures mix_w = Dirichlet("mix_w", a=floatX(np.ones(2)), transform=None, shape=(2,)) - mix = Mixture("mix", w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(self.norm_x)) + mix = Mixture("mix", w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(norm_x)) test_point = model.initial_point() @@ -658,21 +662,23 @@ def mixmixlogp(value, point): ) return priorlogp, mixmixlogpg - value = np.exp(self.norm_x)[:, None] + value = np.exp(norm_x)[:, None] priorlogp, mixmixlogpg = mixmixlogp(value, test_point) # check logp of mixture - assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) + mix_logp_fn = model.compile_logp(vars=[mix], sum=False) + assert_allclose(mixmixlogpg, mix_logp_fn(test_point)[0], rtol=rtol) # check model logp - assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol) + model_logp_fn = model.compile_logp() + assert_allclose(priorlogp + mixmixlogpg.sum(), model_logp_fn(test_point), rtol=rtol) # check input and check logp again test_point["g_w"] = np.asarray([0.1, 0.1, 0.2, 0.6]) test_point["mu_g"] = np.exp(np.random.randn(nbr)) priorlogp, mixmixlogpg = mixmixlogp(value, test_point) - assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) - assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol) + assert_allclose(mixmixlogpg, mix_logp_fn(test_point)[0], rtol=rtol) + assert_allclose(priorlogp + mixmixlogpg.sum(), model_logp_fn(test_point), rtol=rtol) def test_iterable_single_component_warning(self): with warnings.catch_warnings(): diff --git a/pymc/tests/test_printing.py b/pymc/tests/test_printing.py index 6ef08bdf13..8ebaba564a 100644 --- a/pymc/tests/test_printing.py +++ b/pymc/tests/test_printing.py @@ -1,5 +1,6 @@ import numpy as np +from pymc import Bernoulli, Censored, Mixture from pymc.aesaraf import floatX from pymc.distributions import ( DirichletMultinomial, @@ -48,9 +49,14 @@ def setup_class(self): # ) nb2 = NegativeBinomial("nb_with_p_n", p=Uniform("nbp"), n=10) - # Symbolic distribution + # SymbolicRV zip = ZeroInflatedPoisson("zip", 0.5, 5) + # Nested SymbolicRV + comp_1 = ZeroInflatedPoisson.dist(0.5, 5) + comp_2 = Censored.dist(Bernoulli.dist(0.5), -1, 1) + nested_mix = Mixture("nested_mix", [0.5, 0.5], [comp_1, comp_2]) + # Expected value of outcome mu = Deterministic("mu", floatX(alpha + dot(X, b))) @@ -80,7 +86,7 @@ def setup_class(self): # add a potential as well pot = Potential("pot", mu**2) - self.distributions = [alpha, sigma, mu, b, Z, nb2, zip, Y_obs, pot] + self.distributions = [alpha, sigma, mu, b, Z, nb2, zip, nested_mix, Y_obs, pot] self.deterministics_or_potentials = [mu, pot] # tuples of (formatting, include_params self.formats = [("plain", True), ("plain", False), ("latex", True), ("latex", False)] @@ -93,6 +99,11 @@ def setup_class(self): r"Z ~ N(f(), f())", r"nb_with_p_n ~ NB(10, nbp)", r"zip ~ MarginalMixture(f(), DiracDelta(0), Pois(5))", + ( + r"nested_mix ~ MarginalMixture(, " + r"MarginalMixture(f(), DiracDelta(0), Pois(5)), " + r"Censored(Bern(0.5), -1, 1))" + ), r"Y_obs ~ N(mu, sigma)", r"pot ~ Potential(f(beta, alpha))", ], @@ -104,6 +115,7 @@ def setup_class(self): r"Z ~ N", r"nb_with_p_n ~ NB", r"zip ~ MarginalMixture", + r"nested_mix ~ MarginalMixture", r"Y_obs ~ N", r"pot ~ Potential", ], @@ -115,6 +127,11 @@ def setup_class(self): r"$\text{Z} \sim \operatorname{N}(f(),~f())$", r"$\text{nb_with_p_n} \sim \operatorname{NB}(10,~\text{nbp})$", r"$\text{zip} \sim \operatorname{MarginalMixture}(f(),~\text{\$\operatorname{DiracDelta}(0)\$},~\text{\$\operatorname{Pois}(5)\$})$", + ( + r"$\text{nested_mix} \sim \operatorname{MarginalMixture}(\text{}," + r"~\text{\$\operatorname{MarginalMixture}(f(),~\text{\\$\operatorname{DiracDelta}(0)\\$},~\text{\\$\operatorname{Pois}(5)\\$})\$}," + r"~\text{\$\operatorname{Censored}(\text{\\$\operatorname{Bern}(0.5)\\$},~-1,~1)\$})$" + ), r"$\text{Y_obs} \sim \operatorname{N}(\text{mu},~\text{sigma})$", r"$\text{pot} \sim \operatorname{Potential}(f(\text{beta},~\text{alpha}))$", ], @@ -126,6 +143,7 @@ def setup_class(self): r"$\text{Z} \sim \operatorname{N}$", r"$\text{nb_with_p_n} \sim \operatorname{NB}$", r"$\text{zip} \sim \operatorname{MarginalMixture}$", + r"$\text{nested_mix} \sim \operatorname{MarginalMixture}$", r"$\text{Y_obs} \sim \operatorname{N}$", r"$\text{pot} \sim \operatorname{Potential}$", ],