diff --git a/docs/source/api/distributions/utilities.rst b/docs/source/api/distributions/utilities.rst index a6ba09b059..bee4250714 100644 --- a/docs/source/api/distributions/utilities.rst +++ b/docs/source/api/distributions/utilities.rst @@ -7,8 +7,8 @@ Distribution utilities :toctree: generated/ Distribution - SymbolicDistribution Discrete Continuous NoDistribution DensityDist + SymbolicRandomVariable diff --git a/docs/source/api/shape_utils.rst b/docs/source/api/shape_utils.rst index 29485dd4fb..97a2280012 100644 --- a/docs/source/api/shape_utils.rst +++ b/docs/source/api/shape_utils.rst @@ -20,3 +20,4 @@ This module introduces functions that are made aware of the requested `size_tupl broadcast_distribution_samples broadcast_dist_samples_to rv_size_is_none + change_dist_size diff --git a/pymc/aesaraf.py b/pymc/aesaraf.py index c8adb276f8..f9ce65ff82 100644 --- a/pymc/aesaraf.py +++ b/pymc/aesaraf.py @@ -32,9 +32,8 @@ import pandas as pd import scipy.sparse as sps -from aeppl.abstract import MeasurableVariable from aeppl.logprob import CheckParameterValue -from aesara import config, scalar +from aesara import scalar from aesara.compile.mode import Mode, get_mode from aesara.gradient import grad from aesara.graph import node_rewriter @@ -48,7 +47,7 @@ walk, ) from aesara.graph.fg import FunctionGraph -from aesara.graph.op import Op, compute_test_value +from aesara.graph.op import Op from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream from aesara.scalar.basic import Cast from aesara.tensor.basic import _as_tensor_variable @@ -58,12 +57,10 @@ RandomGeneratorSharedVariable, RandomStateSharedVariable, ) -from aesara.tensor.shape import SpecifyShape from aesara.tensor.sharedvar import SharedVariable from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1 from aesara.tensor.var import TensorConstant, TensorVariable -from pymc.exceptions import ShapeError from pymc.vartypes import continuous_types, isgenerator, typefilter PotentialShapeType = Union[int, np.ndarray, Sequence[Union[int, Variable]], TensorVariable] @@ -150,65 +147,6 @@ def dataframe_to_tensor_variable(df: pd.DataFrame, *args, **kwargs) -> TensorVar return at.as_tensor_variable(df.to_numpy(), *args, **kwargs) -def change_rv_size( - rv: TensorVariable, - new_size: PotentialShapeType, - expand: Optional[bool] = False, -) -> TensorVariable: - """Change or expand the size of a `RandomVariable`. - - Parameters - ========== - rv - The old `RandomVariable` output. - new_size - The new size. - expand: - Expand the existing size by `new_size`. - - """ - # Check the dimensionality of the `new_size` kwarg - new_size_ndim = np.ndim(new_size) - if new_size_ndim > 1: - raise ShapeError("The `new_size` must be ≤1-dimensional.", actual=new_size_ndim) - elif new_size_ndim == 0: - new_size = (new_size,) - - # Extract the RV node that is to be resized, together with its inputs, name and tag - assert rv.owner.op is not None - if isinstance(rv.owner.op, SpecifyShape): - rv = rv.owner.inputs[0] - rv_node = rv.owner - rng, size, dtype, *dist_params = rv_node.inputs - name = rv.name - tag = rv.tag - - if expand: - shape = tuple(rv_node.op._infer_shape(size, dist_params)) - size = shape[: len(shape) - rv_node.op.ndim_supp] - new_size = tuple(new_size) + tuple(size) - - # Make sure the new size is a tensor. This dtype-aware conversion helps - # to not unnecessarily pick up a `Cast` in some cases (see #4652). - new_size = at.as_tensor(new_size, ndim=1, dtype="int64") - - new_rv_node = rv_node.op.make_node(rng, new_size, dtype, *dist_params) - new_rv = new_rv_node.outputs[-1] - new_rv.name = name - for k, v in tag.__dict__.items(): - new_rv.tag.__dict__.setdefault(k, v) - - # Update "traditional" rng default_update, if that was set for old RV - default_update = getattr(rng, "default_update", None) - if default_update is not None and default_update is rv_node.outputs[0]: - rng.default_update = new_rv_node.outputs[0] - - if config.compute_test_value != "off": - compute_test_value(new_rv_node) - - return new_rv - - def extract_rv_and_value_vars( var: TensorVariable, ) -> Tuple[TensorVariable, TensorVariable]: @@ -926,6 +864,31 @@ def find_rng_nodes( ] +def replace_rng_nodes(outputs: Sequence[TensorVariable]) -> Sequence[TensorVariable]: + """Replace any RNG nodes upsteram of outputs by new RNGs of the same type + + This can be used when combining a pre-existing graph with a cloned one, to ensure + RNGs are unique across the two graphs. + """ + rng_nodes = find_rng_nodes(outputs) + + # Nothing to do here + if not rng_nodes: + return outputs + + graph = FunctionGraph(outputs=outputs, clone=False) + new_rng_nodes: List[Union[np.random.RandomState, np.random.Generator]] = [] + for rng_node in rng_nodes: + rng_cls: type + if isinstance(rng_node, at.random.var.RandomStateSharedVariable): + rng_cls = np.random.RandomState + else: + rng_cls = np.random.Generator + new_rng_nodes.append(aesara.shared(rng_cls(np.random.PCG64()))) + graph.replace_all(zip(rng_nodes, new_rng_nodes), import_missing=True) + return graph.outputs + + SeedSequenceSeed = Optional[Union[int, Sequence[int], np.ndarray, np.random.SeedSequence]] @@ -987,6 +950,9 @@ def compile_pymc( this function is called within a model context and the model `check_bounds` flag is set to False. """ + # Avoid circular import + from pymc.distributions.distribution import SymbolicRandomVariable + # Create an update mapping of RandomVariable's RNG so that it is automatically # updated after every function call rng_updates = {} @@ -995,7 +961,7 @@ def compile_pymc( var for var in vars_between(inputs, output_to_list) if var.owner - and isinstance(var.owner.op, (RandomVariable, MeasurableVariable)) + and isinstance(var.owner.op, (RandomVariable, SymbolicRandomVariable)) and var not in inputs ): # All nodes in `vars_between(inputs, outputs)` have owners. @@ -1003,14 +969,21 @@ def compile_pymc( assert random_var.owner.op is not None if isinstance(random_var.owner.op, RandomVariable): rng = random_var.owner.inputs[0] - if not hasattr(rng, "default_update"): - rng_updates[rng] = random_var.owner.outputs[0] + if hasattr(rng, "default_update"): + update_map = {rng: rng.default_update} else: - rng_updates[rng] = rng.default_update + update_map = {rng: random_var.owner.outputs[0]} else: - update_fn = getattr(random_var.owner.op, "update", None) - if update_fn is not None: - rng_updates.update(update_fn(random_var.owner)) + update_map = random_var.owner.op.update(random_var.owner) + # Check that we are not setting different update expressions for the same variables + for rng, update in update_map.items(): + if rng not in rng_updates: + rng_updates[rng] = update + # When a variable has multiple outputs, it will be called twice with the same + # update expression. We don't want to raise in that case, only if the update + # expression in different from the one already registered + elif rng_updates[rng] is not update: + raise ValueError(f"Multiple update expressions found for the variable {rng}") # We always reseed random variables as this provides RNGs with no chances of collision if rng_updates: diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index 49a5338302..c4128b4920 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -82,7 +82,7 @@ Discrete, Distribution, NoDistribution, - SymbolicDistribution, + SymbolicRandomVariable, ) from pymc.distributions.mixture import Mixture, NormalMixture from pymc.distributions.multivariate import ( @@ -105,9 +105,11 @@ from pymc.distributions.timeseries import ( AR, GARCH11, + EulerMaruyama, GaussianRandomWalk, MvGaussianRandomWalk, MvStudentTRandomWalk, + RandomWalk, ) __all__ = [ @@ -154,7 +156,7 @@ "OrderedProbit", "DensityDist", "Distribution", - "SymbolicDistribution", + "SymbolicRandomVariable", "Continuous", "Discrete", "NoDistribution", @@ -171,11 +173,13 @@ "WishartBartlett", "LKJCholeskyCov", "LKJCorr", - "AR", "AsymmetricLaplace", + "RandomWalk", "GaussianRandomWalk", "MvGaussianRandomWalk", "MvStudentTRandomWalk", + "AR", + "EulerMaruyama", "GARCH11", "SkewNormal", "Mixture", diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py index c38681d79f..ab4cdb3f67 100644 --- a/pymc/distributions/censored.py +++ b/pymc/distributions/censored.py @@ -14,16 +14,26 @@ import aesara.tensor as at import numpy as np -from aesara.scalar import Clip from aesara.tensor import TensorVariable from aesara.tensor.random.op import RandomVariable -from pymc.aesaraf import change_rv_size -from pymc.distributions.distribution import SymbolicDistribution, _moment +from pymc.distributions.distribution import ( + Distribution, + SymbolicRandomVariable, + _moment, +) +from pymc.distributions.shape_utils import _change_dist_size, change_dist_size from pymc.util import check_dist_not_registered -class Censored(SymbolicDistribution): +class CensoredRV(SymbolicRandomVariable): + """Censored random variable""" + + inline_aeppl = True + _print_name = ("Censored", "\\operatorname{Censored}") + + +class Censored(Distribution): r""" Censored distribution @@ -72,9 +82,13 @@ class Censored(SymbolicDistribution): censored_normal = pm.Censored("censored_normal", normal_dist, lower=-1, upper=1) """ + rv_type = CensoredRV + @classmethod def dist(cls, dist, lower, upper, **kwargs): - if not isinstance(dist, TensorVariable) or not isinstance(dist.owner.op, RandomVariable): + if not isinstance(dist, TensorVariable) or not isinstance( + dist.owner.op, (RandomVariable, SymbolicRandomVariable) + ): raise ValueError( f"Censoring dist must be a distribution created via the `.dist()` API, got {type(dist)}" ) @@ -85,10 +99,6 @@ def dist(cls, dist, lower, upper, **kwargs): check_dist_not_registered(dist) return super().dist([dist, lower, upper], **kwargs) - @classmethod - def ndim_supp(cls, *dist_params): - return 0 - @classmethod def rv_op(cls, dist, lower=None, upper=None, size=None): @@ -97,29 +107,28 @@ def rv_op(cls, dist, lower=None, upper=None, size=None): # When size is not specified, dist may have to be broadcasted according to lower/upper dist_shape = size if size is not None else at.broadcast_shape(dist, lower, upper) - dist = change_rv_size(dist, dist_shape) + dist = change_dist_size(dist, dist_shape) # Censoring is achieved by clipping the base distribution between lower and upper - rv_out = at.clip(dist, lower, upper) + dist_, lower_, upper_ = dist.type(), lower.type(), upper.type() + censored_rv_ = at.clip(dist_, lower_, upper_) - # Reference nodes to facilitate identification in other classmethods, without - # worring about possible dimshuffles - rv_out.tag.dist = dist - rv_out.tag.lower = lower - rv_out.tag.upper = upper + return CensoredRV( + inputs=[dist_, lower_, upper_], + outputs=[censored_rv_], + ndim_supp=0, + )(dist, lower, upper) - return rv_out - @classmethod - def change_size(cls, rv, new_size, expand=False): - dist = rv.tag.dist - lower = rv.tag.lower - upper = rv.tag.upper - new_dist = change_rv_size(dist, new_size, expand=expand) - return cls.rv_op(new_dist, lower, upper) +@_change_dist_size.register(CensoredRV) +def change_censored_size(cls, dist, new_size, expand=False): + uncensored_dist, lower, upper = dist.owner.inputs + if expand: + new_size = tuple(new_size) + tuple(uncensored_dist.shape) + return Censored.rv_op(uncensored_dist, lower, upper, size=new_size) -@_moment.register(Clip) +@_moment.register(CensoredRV) def moment_censored(op, rv, dist, lower, upper): moment = at.switch( at.eq(lower, -np.inf), diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index d0b6747662..7654c5e58c 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -24,29 +24,35 @@ import aesara import numpy as np +from aeppl.abstract import MeasurableVariable, _get_measurable_outputs from aeppl.logprob import _logcdf, _logprob +from aeppl.rewriting import logprob_rewrites_db from aesara import tensor as at -from aesara.graph.basic import Variable +from aesara.compile.builders import OpFromGraph +from aesara.graph import node_rewriter +from aesara.graph.basic import Node, Variable, clone_replace +from aesara.graph.rewriting.basic import in2out +from aesara.graph.utils import MetaType from aesara.tensor.basic import as_tensor_variable -from aesara.tensor.elemwise import Elemwise from aesara.tensor.random.op import RandomVariable +from aesara.tensor.random.type import RandomType from aesara.tensor.var import TensorVariable from typing_extensions import TypeAlias -from pymc.aesaraf import change_rv_size, convert_observed_data +from pymc.aesaraf import convert_observed_data from pymc.distributions.shape_utils import ( Dims, Shape, - Size, StrongDims, StrongShape, + change_dist_size, convert_dims, convert_shape, convert_size, find_size, shape_from_dims, ) -from pymc.printing import str_for_dist, str_for_symbolic_dist +from pymc.printing import str_for_dist from pymc.util import UNSET from pymc.vartypes import string_types @@ -54,10 +60,10 @@ "DensityDistRV", "DensityDist", "Distribution", - "SymbolicDistribution", "Continuous", "Discrete", "NoDistribution", + "SymbolicRandomVariable", ] DIST_PARAMETER_TYPES: TypeAlias = Union[np.ndarray, int, float, TensorVariable] @@ -105,6 +111,7 @@ def _random(*args, **kwargs): if isinstance(rv_op, RandomVariable): rv_type = type(rv_op) + clsdict["rv_type"] = rv_type new_cls = super().__new__(cls, name, bases, clsdict) @@ -178,11 +185,55 @@ def _make_rv_and_resize_shape_from_dims( return rv_out, resize_shape_from_dims +class SymbolicRandomVariable(OpFromGraph): + """Symbolic Random Variable + + This is a subclasse of `OpFromGraph` which is used to encapsulate the symbolic + random graph of complex distributions which are built on top of pure + `RandomVariable`s. + + These graphs may vary structurally based on the inputs (e.g., their dimensionality), + and usually require that random inputs have specific shapes for correct outputs + (e.g., avoiding broadcasting of random inputs). Due to this, most distributions that + return SymbolicRandomVariable create their these graphs at runtime via the + classmethod `cls.rv_op`, taking care to clone and resize random inputs, if needed. + """ + + ndim_supp: int = None + """Number of support dimensions as in RandomVariables + (0 for scalar, 1 for vector, ...) + """ + + inline_aeppl: bool = False + """Specifies whether the logprob function is derived automatically by introspection + of the inner graph. + + If `False`, a logprob function must be dispatched directly to the subclass type. + """ + + _print_name: Tuple[str, str] = ("Unknown", "\\operatorname{Unknown}") + """Tuple of (name, latex name) used for for pretty-printing variables of this type""" + + def __init__(self, *args, ndim_supp, **kwargs): + self.ndim_supp = ndim_supp + kwargs.setdefault("inline", True) + super().__init__(*args, **kwargs) + + def update(self, node: Node): + """Symbolic update expression for input random state variables + + Returns a dictionary with the symbolic expressions required for correct updating + of random state input variables repeated function evaluations. This is used by + `aesaraf.compile_pymc`. + """ + return {} + + class Distribution(metaclass=DistributionMeta): """Statistical distribution""" - rv_class = None - rv_op: RandomVariable = None + rv_op: [RandomVariable, SymbolicRandomVariable] = None + rv_type: MetaType = None def __new__( cls, @@ -270,9 +321,9 @@ def __new__( # Resize variable based on `dims` information if resize_shape_from_dims: resize_size_from_dims = find_size( - shape=resize_shape_from_dims, size=None, ndim_supp=cls.rv_op.ndim_supp + shape=resize_shape_from_dims, size=None, ndim_supp=rv_out.owner.op.ndim_supp ) - rv_out = change_rv_size(rv=rv_out, new_size=resize_size_from_dims, expand=False) + rv_out = change_dist_size(dist=rv_out, new_size=resize_size_from_dims, expand=False) rv_out = model.register_rv( rv_out, @@ -346,7 +397,11 @@ def dist( shape = convert_shape(shape) size = convert_size(size) - create_size = find_size(shape=shape, size=size, ndim_supp=cls.rv_op.ndim_supp) + # SymbolicRVs don't have `ndim_supp` until they are created + ndim_supp = getattr(cls.rv_op, "ndim_supp", None) + if ndim_supp is None: + ndim_supp = cls.rv_op(*dist_params, **kwargs).owner.op.ndim_supp + create_size = find_size(shape=shape, size=size, ndim_supp=ndim_supp) rv_out = cls.rv_op(*dist_params, size=create_size, **kwargs) rv_out.logp = _make_nice_attr_error("rv.logp(x)", "pm.logp(rv, x)") @@ -355,205 +410,34 @@ def dist( return rv_out -class SymbolicDistribution: - """Symbolic statistical distribution - - While traditional PyMC distributions are represented by a single RandomVariable - graph, Symbolic distributions correspond to a larger graph that contains one or - more RandomVariables and an arbitrary number of deterministic operations, which - represent their own kind of distribution. - - The graphs returned by symbolic distributions can be evaluated directly to - obtain valid draws and can further be parsed by Aeppl to derive the - corresponding logp at runtime. - - Check pymc.distributions.Censored for an example of a symbolic distribution. - - Symbolic distributions must implement the following classmethods: - cls.dist - Performs input validation and converts optional alternative parametrizations - to a canonical parametrization. It should call `super().dist()`, passing a - list with the default parameters as the first and only non keyword argument, - followed by other keyword arguments like size and rngs, and return the result - cls.ndim_supp - Returns the support of the symbolic distribution, given the default set of - parameters. This may not always be constant, for instance if the symbolic - distribution can be defined based on an arbitrary base distribution. - cls.rv_op - Returns a TensorVariable that represents the symbolic distribution - parametrized by a default set of parameters and a size and rngs arguments - cls.change_size - Returns an equivalent symbolic distribution with a different size. This is - analogous to `pymc.aesaraf.change_rv_size` for `RandomVariable`s. - """ - - def __new__( - cls, - name: str, - *args, - dims: Optional[Dims] = None, - initval=None, - observed=None, - total_size=None, - transform=UNSET, - **kwargs, - ) -> TensorVariable: - """Adds a TensorVariable corresponding to a PyMC symbolic distribution to the - current model. - - Parameters - ---------- - cls : type - A distribution class that inherits from SymbolicDistribution. - name : str - Name for the new model variable. - dims : tuple, optional - A tuple of dimension names known to the model. When shape is not provided, - the shape of dims is used to define the shape of the variable. - initval : optional - Numeric or symbolic untransformed initial value of matching shape, - or one of the following initial value strategies: "moment", "prior". - Depending on the sampler's settings, a random jitter may be added to numeric, - symbolic or moment-based initial values in the transformed space. - observed : optional - Observed data to be passed when registering the random variable in the model. - When neither shape nor dims is provided, the shape of observed is used to - define the shape of the variable. - See ``Model.register_rv``. - total_size : float, optional - See ``Model.register_rv``. - transform : optional - See ``Model.register_rv``. - **kwargs - Keyword arguments that will be forwarded to ``.dist()``. - Most prominently: ``shape`` and ``size`` - - Returns - ------- - var : TensorVariable - The created variable, registered in the Model. - """ - - try: - from pymc.model import Model - - model = Model.get_context() - except TypeError: - raise TypeError( - "No model on context stack, which is needed to " - "instantiate distributions. Add variable inside " - "a 'with model:' block, or use the '.dist' syntax " - "for a standalone distribution." - ) - - if "testval" in kwargs: - initval = kwargs.pop("testval") - warnings.warn( - "The `testval` argument is deprecated; use `initval`.", - FutureWarning, - stacklevel=2, - ) - - if not isinstance(name, string_types): - raise TypeError(f"Name needs to be a string but got: {name}") - - dims = convert_dims(dims) - if observed is not None: - observed = convert_observed_data(observed) - - # Create the RV, without taking `dims` into consideration - rv_out, resize_shape_from_dims = _make_rv_and_resize_shape_from_dims( - cls=cls, dims=dims, model=model, observed=observed, args=args, **kwargs - ) - - # Resize variable based on `dims` information - if resize_shape_from_dims: - resize_size_from_dims = find_size( - shape=resize_shape_from_dims, size=None, ndim_supp=rv_out.tag.ndim_supp - ) - rv_out = cls.change_size(rv=rv_out, new_size=resize_size_from_dims, expand=False) - - rv_out = model.register_rv( - rv_out, - name, - observed, - total_size, - dims=dims, - transform=transform, - initval=initval, - ) - # add in pretty-printing support - rv_out.str_repr = types.MethodType(str_for_symbolic_dist, rv_out) - rv_out._repr_latex_ = types.MethodType( - functools.partial(str_for_symbolic_dist, formatting="latex"), rv_out - ) - - rv_out.logp = _make_nice_attr_error("rv.logp(x)", "pm.logp(rv, x)") - rv_out.logcdf = _make_nice_attr_error("rv.logcdf(x)", "pm.logcdf(rv, x)") - rv_out.random = _make_nice_attr_error("rv.random()", "pm.draw(rv)") - - return rv_out - - @classmethod - def dist( - cls, - dist_params, - *, - shape: Optional[Shape] = None, - size: Optional[Size] = None, - **kwargs, - ) -> TensorVariable: - """Creates a TensorVariable corresponding to the `cls` symbolic distribution. - - Parameters - ---------- - dist_params : array-like - The inputs to the `RandomVariable` `Op`. - shape : int, tuple, Variable, optional - A tuple of sizes for each dimension of the new RV. - size : int, tuple, Variable, optional - For creating the RV like in Aesara/NumPy. +# Let Aeppl know that the SymbolicRandomVariable has a logprob. +MeasurableVariable.register(SymbolicRandomVariable) - Returns - ------- - var : TensorVariable - """ - if "testval" in kwargs: - kwargs.pop("testval") - warnings.warn( - "The `.dist(testval=...)` argument is deprecated and has no effect. " - "Initial values for sampling/optimization can be specified with `initval` in a modelcontext. " - "For using Aesara's test value features, you must assign the `.tag.test_value` yourself.", - FutureWarning, - stacklevel=2, - ) - if "initval" in kwargs: - raise TypeError( - "Unexpected keyword argument `initval`. " - "This argument is not available for the `.dist()` API." - ) +@_get_measurable_outputs.register(SymbolicRandomVariable) +def _get_measurable_outputs_symbolic_random_variable(op, node): + # This tells Aeppl that any non RandomType outputs are measurable + return [out for out in node.outputs if not isinstance(out.type, RandomType)] - if "dims" in kwargs: - raise NotImplementedError("The use of a `.dist(dims=...)` API is not supported.") - if shape is not None and size is not None: - raise ValueError( - f"Passing both `shape` ({shape}) and `size` ({size}) is not supported!" - ) - shape = convert_shape(shape) - size = convert_size(size) +@node_rewriter([SymbolicRandomVariable]) +def inline_symbolic_random_variable(fgraph, node): + """ + This optimization expands the internal graph of a SymbolicRV when obtaining logp + from Aeppl, if the flag `inline_aeppl` is True. + """ + op = node.op + if op.inline_aeppl: + return clone_replace(op.inner_outputs, {u: v for u, v in zip(op.inner_inputs, node.inputs)}) - ndim_supp = cls.ndim_supp(*dist_params) - create_size = find_size(shape=shape, size=size, ndim_supp=ndim_supp) - rv_out = cls.rv_op(*dist_params, size=create_size, **kwargs) - # This is needed for resizing from dims in `__new__` - rv_out.tag.ndim_supp = ndim_supp - rv_out.logp = _make_nice_attr_error("rv.logp(x)", "pm.logp(rv, x)") - rv_out.logcdf = _make_nice_attr_error("rv.logcdf(x)", "pm.logcdf(rv, x)") - rv_out.random = _make_nice_attr_error("rv.random()", "pm.draw(rv)") - return rv_out +# Registered before pre-canonicalization which happens at position=-10 +logprob_rewrites_db.register( + "inline_SymbolicRandomVariable", + in2out(inline_symbolic_random_variable), + "basic", + position=-20, +) @singledispatch @@ -571,12 +455,6 @@ def moment(rv: TensorVariable) -> TensorVariable: return _moment(rv.owner.op, rv, *rv.owner.inputs).astype(rv.dtype) -@_moment.register(Elemwise) -def moment_elemwise(op, rv, *dist_params): - """For Elemwise Ops, dispatch on respective scalar_op""" - return _moment(op.scalar_op, rv, *dist_params) - - class Discrete(Distribution): """Base class for discrete distributions""" diff --git a/pymc/distributions/logprob.py b/pymc/distributions/logprob.py index a75d2b973d..7e0114a8cc 100644 --- a/pymc/distributions/logprob.py +++ b/pymc/distributions/logprob.py @@ -17,16 +17,20 @@ from typing import Dict, List, Optional, Sequence, Union import aesara -import aesara.tensor as at import numpy as np -from aeppl import factorized_joint_logprob +from aeppl import factorized_joint_logprob, logprob from aeppl.abstract import assign_custom_measurable_outputs +from aeppl.logprob import _logprob from aeppl.logprob import logcdf as logcdf_aeppl from aeppl.logprob import logprob as logp_aeppl +from aeppl.tensor import MeasurableJoin from aeppl.transforms import TransformValuesRewrite +from aesara import tensor as at +from aesara.graph import FunctionGraph, rewrite_graph from aesara.graph.basic import graph_inputs, io_toposort from aesara.tensor.random.op import RandomVariable +from aesara.tensor.rewriting.basic import ShapeFeature, topo_constant_folding from aesara.tensor.subtensor import ( AdvancedIncSubtensor, AdvancedIncSubtensor1, @@ -320,3 +324,51 @@ def ignore_logprob(rv: TensorVariable) -> TensorVariable: return rv new_node = assign_custom_measurable_outputs(node, type_prefix=prefix) return new_node.outputs[node.outputs.index(rv)] + + +@_logprob.register(MeasurableJoin) +def logprob_join_constant_shapes(op, values, axis, *base_vars, **kwargs): + """Compute the log-likelihood graph for a `Join`. + + This overrides the implementation in Aeppl, to constant fold the shapes + of the base vars so that RandomVariables do not show up in the logp graph, + which is a requirement enforced by `pymc.distributions.logprob.joint_logp` + """ + (value,) = values + + base_var_shapes = [base_var.shape[axis] for base_var in base_vars] + + shape_fg = FunctionGraph( + outputs=base_var_shapes, + features=[ShapeFeature()], + clone=True, + ) + base_var_shapes = rewrite_graph(shape_fg, custom_opt=topo_constant_folding).outputs + + split_values = at.split( + value, + splits_size=[base_var_shape for base_var_shape in base_var_shapes], + n_splits=len(base_vars), + axis=axis, + ) + + logps = [ + logprob(base_var, split_value) for base_var, split_value in zip(base_vars, split_values) + ] + + if len({logp.ndim for logp in logps}) != 1: + raise ValueError( + "Joined logps have different number of dimensions, this can happen when " + "joining univariate and multivariate distributions", + ) + + base_vars_ndim_supp = split_values[0].ndim - logps[0].ndim + join_logprob = at.concatenate( + [ + at.atleast_1d(logprob(base_var, split_value)) + for base_var, split_value in zip(base_vars, split_values) + ], + axis=axis - base_vars_ndim_supp, + ) + + return join_logprob diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py index 3f71f537c5..d8c509fade 100644 --- a/pymc/distributions/mixture.py +++ b/pymc/distributions/mixture.py @@ -17,21 +17,23 @@ import aesara.tensor as at import numpy as np -from aeppl.abstract import MeasurableVariable, _get_measurable_outputs from aeppl.logprob import _logcdf, _logprob from aeppl.transforms import IntervalTransform -from aesara.compile.builders import OpFromGraph from aesara.graph.basic import Node, equal_computations from aesara.tensor import TensorVariable from aesara.tensor.random.op import RandomVariable -from pymc.aesaraf import change_rv_size from pymc.distributions import transforms from pymc.distributions.continuous import Normal, get_tau_sigma from pymc.distributions.dist_math import check_parameters -from pymc.distributions.distribution import SymbolicDistribution, _moment, moment +from pymc.distributions.distribution import ( + Distribution, + SymbolicRandomVariable, + _moment, + moment, +) from pymc.distributions.logprob import ignore_logprob, logcdf, logp -from pymc.distributions.shape_utils import to_tuple +from pymc.distributions.shape_utils import _change_dist_size, change_dist_size from pymc.distributions.transforms import _default_transform from pymc.util import check_dist_not_registered from pymc.vartypes import continuous_types, discrete_types @@ -39,20 +41,18 @@ __all__ = ["Mixture", "NormalMixture"] -class MarginalMixtureRV(OpFromGraph): +class MarginalMixtureRV(SymbolicRandomVariable): """A placeholder used to specify a log-likelihood for a mixture sub-graph.""" default_output = 1 + _print_name = ("MarginalMixture", "\\operatorname{MarginalMixture}") def update(self, node: Node): # Update for the internal mix_indexes RV return {node.inputs[0]: node.outputs[0]} -MeasurableVariable.register(MarginalMixtureRV) - - -class Mixture(SymbolicDistribution): +class Mixture(Distribution): R""" Mixture log-likelihood @@ -161,6 +161,8 @@ class Mixture(SymbolicDistribution): like = pm.Mixture('like', w=w, comp_dists=components, observed=data) """ + rv_type = MarginalMixtureRV + @classmethod def dist(cls, w, comp_dists, **kwargs): if not isinstance(comp_dists, (tuple, list)): @@ -189,7 +191,7 @@ def dist(cls, w, comp_dists, **kwargs): # TODO: Allow these to not be a RandomVariable as long as we can call `ndim_supp` on them # and resize them if not isinstance(dist, TensorVariable) or not isinstance( - dist.owner.op, RandomVariable + dist.owner.op, (RandomVariable, SymbolicRandomVariable) ): raise ValueError( f"Component dist must be a distribution created via the `.dist()` API, got {type(dist)}" @@ -205,11 +207,6 @@ def dist(cls, w, comp_dists, **kwargs): w = at.as_tensor_variable(w) return super().dist([w, *comp_dists], **kwargs) - @classmethod - def ndim_supp(cls, weights, *components): - # We already checked that all components have the same support dimensionality - return components[0].owner.op.ndim_supp - @classmethod def rv_op(cls, weights, *components, size=None): # Create new rng for the mix_indexes internal RV @@ -293,16 +290,12 @@ def rv_op(cls, weights, *components, size=None): mix_op = MarginalMixtureRV( inputs=[mix_indexes_rng_, weights_, *components_], outputs=[mix_indexes_rng_next_, mix_out_], + ndim_supp=components[0].owner.op.ndim_supp, ) # Create the actual MarginalMixture variable mix_out = mix_op(mix_indexes_rng, weights, *components) - # Reference nodes to facilitate identification in other classmethods - mix_out.tag.weights = weights - mix_out.tag.components = components - mix_out.tag.choices_rng = mix_indexes_rng - return mix_out @classmethod @@ -314,33 +307,25 @@ def _resize_components(cls, size, *components): mix_size = components[0].shape[mix_axis] size = tuple(size) + (mix_size,) - return [change_rv_size(component, size) for component in components] + return [change_dist_size(component, size) for component in components] - @classmethod - def change_size(cls, rv, new_size, expand=False): - weights = rv.tag.weights - components = rv.tag.components - - if expand: - component = rv.tag.components[0] - # Old size is equal to `shape[:-ndim_supp]`, with care needed for `ndim_supp == 0` - size_dims = component.ndim - component.owner.op.ndim_supp - if len(rv.tag.components) == 1: - # If we have a single component, new size should ignore the mixture axis - # dimension, as that is not touched by `_resize_components` - size_dims -= 1 - old_size = components[0].shape[:size_dims] - new_size = to_tuple(new_size) + tuple(old_size) - - components = cls._resize_components(new_size, *components) - - return cls.rv_op(weights, *components, size=None) - - -@_get_measurable_outputs.register(MarginalMixtureRV) -def _get_measurable_outputs_MarginalMixtureRV(op, node): - # This tells Aeppl that the second output is the measurable one - return [node.outputs[1]] + +@_change_dist_size.register(MarginalMixtureRV) +def change_marginal_mixture_size(op, dist, new_size, expand=False): + weights, *components = dist.owner.inputs[1:] + + if expand: + component = components[0] + # Old size is equal to `shape[:-ndim_supp]`, with care needed for `ndim_supp == 0` + size_dims = component.ndim - component.owner.op.ndim_supp + if len(components) == 1: + # If we have a single component, new size should ignore the mixture axis + # dimension, as that is not touched by `_resize_components` + size_dims -= 1 + old_size = components[0].shape[:size_dims] + new_size = tuple(new_size) + tuple(old_size) + + return Mixture.rv_op(weights, *components, size=new_size) @_logprob.register(MarginalMixtureRV) diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index fcca5fccfa..a2dfb9500a 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -24,6 +24,7 @@ import numpy as np import scipy +from aeppl.logprob import _logprob from aesara.graph.basic import Apply, Constant, Variable from aesara.graph.op import Op from aesara.raise_op import Assert @@ -32,14 +33,14 @@ from aesara.tensor.nlinalg import det, eigh, matrix_inverse, trace from aesara.tensor.random.basic import dirichlet, multinomial, multivariate_normal from aesara.tensor.random.op import RandomVariable, default_supp_shape_from_params -from aesara.tensor.random.utils import broadcast_params, normalize_size_param +from aesara.tensor.random.utils import broadcast_params from aesara.tensor.slinalg import Cholesky, SolveTriangular from aesara.tensor.type import TensorType from scipy import linalg, stats import pymc as pm -from pymc.aesaraf import change_rv_size, floatX, intX +from pymc.aesaraf import floatX, intX from pymc.distributions import transforms from pymc.distributions.continuous import BoundedContinuous, ChiSquared, Normal from pymc.distributions.dist_math import ( @@ -49,10 +50,19 @@ logpow, multigammaln, ) -from pymc.distributions.distribution import Continuous, Discrete, moment +from pymc.distributions.distribution import ( + Continuous, + Discrete, + Distribution, + SymbolicRandomVariable, + _moment, + moment, +) from pymc.distributions.logprob import ignore_logprob from pymc.distributions.shape_utils import ( + _change_dist_size, broadcast_dist_samples_to, + change_dist_size, rv_size_is_none, to_tuple, ) @@ -1096,12 +1106,12 @@ def _lkj_normalizing_constant(eta, n): return result -class _LKJCholeskyCovRV(RandomVariable): - name = "_lkjcholeskycov" +class _LKJCholeskyCovBaseRV(RandomVariable): + name = "_lkjcholeskycovbase" ndim_supp = 1 ndims_params = [0, 0, 1] dtype = "floatX" - _print_name = ("_lkjcholeskycov", "\\operatorname{_lkjcholeskycov}") + _print_name = ("_lkjcholeskycovbase", "\\operatorname{_lkjcholeskycovbase}") def make_node(self, rng, size, dtype, n, eta, D): n = at.as_tensor_variable(n) @@ -1114,35 +1124,19 @@ def make_node(self, rng, size, dtype, n, eta, D): D = at.as_tensor_variable(D) - # We resize the sd_dist `D` automatically so that it has (size x n) independent - # draws which is what the `_LKJCholeskyCovRV.rng_fn` expects. This makes the - # random and logp methods equivalent, as the latter also assumes a unique value - # for each diagonal element. - # Since `eta` and `n` are forced to be scalars we don't need to worry about - # implied batched dimensions for the time being. - size = normalize_size_param(size) - if D.owner.op.ndim_supp == 0: - D = change_rv_size(D, at.concatenate((size, (n,)))) - else: - # The support shape must be `n` but we have no way of controlling it - D = change_rv_size(D, size) - return super().make_node(rng, size, dtype, n, eta, D) - def _infer_shape(self, size, dist_params, param_shapes=None): + def _supp_shape_from_params(self, dist_params, param_shapes): n = dist_params[0] - dist_shape = tuple(size) + ((n * (n + 1)) // 2,) - return dist_shape + return ((n * (n + 1)) // 2,) def rng_fn(self, rng, n, eta, D, size): # We flatten the size to make operations easier, and then rebuild it if size is None: - flat_size = 1 - else: - flat_size = np.prod(size) - - C = LKJCorrRV._random_corr_matrix(rng, n, eta, flat_size) + size = D.shape[:-1] + flat_size = np.prod(size).astype(int) + C = LKJCorrRV._random_corr_matrix(rng=rng, n=n, eta=eta, flat_size=flat_size) D = D.reshape(flat_size, n) C *= D[..., :, np.newaxis] * D[..., np.newaxis, :] @@ -1158,101 +1152,138 @@ def rng_fn(self, rng, n, eta, D, size): return samples -_ljk_cholesky_cov = _LKJCholeskyCovRV() +_ljk_cholesky_cov_base = _LKJCholeskyCovBaseRV() -class _LKJCholeskyCov(Continuous): +# _LKJCholeskyCovBaseRV requires a properly shaped `D`, which means the variable can't +# be safely resized. Because of this, we add the thin SymbolicRandomVariable wrapper +class _LKJCholeskyCovRV(SymbolicRandomVariable): + default_output = 1 + _print_name = ("_lkjcholeskycov", "\\operatorname{_lkjcholeskycov}") + + def update(self, node): + return {node.inputs[0]: node.outputs[0]} + + +class _LKJCholeskyCov(Distribution): r"""Underlying class for covariance matrix with LKJ distributed correlations. See docs for LKJCholeskyCov function for more details on how to use it in models. """ - rv_op = _ljk_cholesky_cov - def __new__(cls, name, eta, n, sd_dist, **kwargs): - check_dist_not_registered(sd_dist) - return super().__new__(cls, name, eta, n, sd_dist, **kwargs) + rv_type = _LKJCholeskyCovRV @classmethod - def dist(cls, eta, n, sd_dist, **kwargs): - eta = at.as_tensor_variable(floatX(eta)) + def dist(cls, n, eta, sd_dist, **kwargs): n = at.as_tensor_variable(intX(n)) + eta = at.as_tensor_variable(floatX(eta)) if not ( isinstance(sd_dist, Variable) and sd_dist.owner is not None - and isinstance(sd_dist.owner.op, RandomVariable) + and isinstance(sd_dist.owner.op, (RandomVariable, SymbolicRandomVariable)) and sd_dist.owner.op.ndim_supp < 2 ): raise TypeError("sd_dist must be a scalar or vector distribution variable") + check_dist_not_registered(sd_dist) # sd_dist is part of the generative graph, but should be completely ignored # by the logp graph, since the LKJ logp explicitly includes these terms. - # TODO: Things could be simplified a bit if we managed to extract the - # sd_dist prior components from the logp expression. sd_dist = ignore_logprob(sd_dist) - return super().dist([n, eta, sd_dist], **kwargs) - def moment(rv, size, n, eta, sd_dists): - diag_idxs = (at.cumsum(at.arange(1, n + 1)) - 1).astype("int32") - moment = at.zeros_like(rv) - moment = at.set_subtensor(moment[..., diag_idxs], 1) - return moment + @classmethod + def rv_op(cls, n, eta, sd_dist, size=None): + # We resize the sd_dist automatically so that it has (size x n) independent + # draws which is what the `_LKJCholeskyCovBaseRV.rng_fn` expects. This makes the + # random and logp methods equivalent, as the latter also assumes a unique value + # for each diagonal element. + # Since `eta` and `n` are forced to be scalars we don't need to worry about + # implied batched dimensions from those for the time being. + if size is None: + size = sd_dist.shape[:-1] + shape = tuple(size) + (n,) + if sd_dist.owner.op.ndim_supp == 0: + sd_dist = change_dist_size(sd_dist, shape) + else: + # The support shape must be `n` but we have no way of controlling it + sd_dist = change_dist_size(sd_dist, shape[:-1]) - def logp(value, n, eta, sd_dist): - """ - Calculate log-probability of Covariance matrix with LKJ - distributed correlations at specified value. + # Create new rng for the _lkjcholeskycov internal RV + rng = aesara.shared(np.random.default_rng()) - Parameters - ---------- - value: numeric - Value for which log-probability is calculated. + rng_, n_, eta_, sd_dist_ = rng.type(), n.type(), eta.type(), sd_dist.type() + next_rng_, lkjcov_ = _ljk_cholesky_cov_base(n_, eta_, sd_dist_, rng=rng_).owner.outputs - Returns - ------- - TensorVariable - """ + return _LKJCholeskyCovRV( + inputs=[rng_, n_, eta_, sd_dist_], + outputs=[next_rng_, lkjcov_], + ndim_supp=1, + )(rng, n, eta, sd_dist) - if value.ndim > 1: - raise ValueError("LKJCholeskyCov logp is only implemented for vector values (ndim=1)") - diag_idxs = at.cumsum(at.arange(1, n + 1)) - 1 - cumsum = at.cumsum(value**2) - variance = at.zeros(at.atleast_1d(n)) - variance = at.inc_subtensor(variance[0], value[0] ** 2) - variance = at.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) - sd_vals = at.sqrt(variance) +@_change_dist_size.register(_LKJCholeskyCovRV) +def change_LKJCholeksyCovRV_size(op, dist, new_size, expand=False): + n, eta, sd_dist = dist.owner.inputs[1:] - logp_sd = pm.logp(sd_dist, sd_vals).sum() - corr_diag = value[diag_idxs] / sd_vals + if expand: + old_size = sd_dist.shape[:-1] + new_size = tuple(new_size) + tuple(old_size) - logp_lkj = (2 * eta - 3 + n - at.arange(n)) * at.log(corr_diag) - logp_lkj = at.sum(logp_lkj) + return _LKJCholeskyCov.rv_op(n, eta, sd_dist, size=new_size) - # Compute the log det jacobian of the second transformation - # described in the docstring. - idx = at.arange(n) - det_invjac = at.log(corr_diag) - idx * at.log(sd_vals) - det_invjac = det_invjac.sum() - # TODO: _lkj_normalizing_constant currently requires `eta` and `n` to be constants - if not isinstance(n, Constant): - raise NotImplementedError("logp only implemented for constant `n`") - n = int(n.data) +@_moment.register(_LKJCholeskyCovRV) +def _LKJCholeksyCovRV_moment(op, rv, rng, n, eta, sd_dist): + diag_idxs = (at.cumsum(at.arange(1, n + 1)) - 1).astype("int32") + moment = at.zeros_like(rv) + moment = at.set_subtensor(moment[..., diag_idxs], 1) + return moment - if not isinstance(eta, Constant): - raise NotImplementedError("logp only implemented for constant `eta`") - eta = float(eta.data) - norm = _lkj_normalizing_constant(eta, n) +@_default_transform.register(_LKJCholeskyCovRV) +def _LKJCholeksyCovRV_default_transform(op, rv): + _, n, _, _ = rv.owner.inputs + return transforms.CholeskyCovPacked(n) - return norm + logp_lkj + logp_sd + det_invjac +@_logprob.register(_LKJCholeskyCovRV) +def _LKJCholeksyCovRV_logp(op, values, rng, n, eta, sd_dist, **kwargs): + (value,) = values -@_default_transform.register(_LKJCholeskyCov) -def lkjcholeskycov_default_transform(op, rv): - _, _, _, n, _, _ = rv.owner.inputs - return transforms.CholeskyCovPacked(n) + if value.ndim > 1: + raise ValueError("_LKJCholeskyCov logp is only implemented for vector values (ndim=1)") + + diag_idxs = at.cumsum(at.arange(1, n + 1)) - 1 + cumsum = at.cumsum(value**2) + variance = at.zeros(at.atleast_1d(n)) + variance = at.inc_subtensor(variance[0], value[0] ** 2) + variance = at.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) + sd_vals = at.sqrt(variance) + + logp_sd = pm.logp(sd_dist, sd_vals).sum() + corr_diag = value[diag_idxs] / sd_vals + + logp_lkj = (2 * eta - 3 + n - at.arange(n)) * at.log(corr_diag) + logp_lkj = at.sum(logp_lkj) + + # Compute the log det jacobian of the second transformation + # described in the docstring. + idx = at.arange(n) + det_invjac = at.log(corr_diag) - idx * at.log(sd_vals) + det_invjac = det_invjac.sum() + + # TODO: _lkj_normalizing_constant currently requires `eta` and `n` to be constants + if not isinstance(n, Constant): + raise NotImplementedError("logp only implemented for constant `n`") + n = int(n.data) + + if not isinstance(eta, Constant): + raise NotImplementedError("logp only implemented for constant `eta`") + eta = float(eta.data) + + norm = _lkj_normalizing_constant(eta, n) + + return norm + logp_lkj + logp_sd + det_invjac class LKJCholeskyCov: @@ -1461,7 +1492,7 @@ def rng_fn(cls, rng, n, eta, size): else: flat_size = np.prod(size) - C = cls._random_corr_matrix(rng, n, eta, flat_size) + C = cls._random_corr_matrix(rng=rng, n=n, eta=eta, flat_size=flat_size) triu_idx = np.triu_indices(n, k=1) samples = C[..., triu_idx[0], triu_idx[1]] diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py index 21a990c2ac..74a96ba4b1 100644 --- a/pymc/distributions/shape_utils.py +++ b/pymc/distributions/shape_utils.py @@ -17,12 +17,19 @@ A collection of common shape operations needed for broadcasting samples from probability distributions for stochastic nodes in PyMC. """ +import warnings +from functools import singledispatch from typing import Optional, Sequence, Tuple, Union import numpy as np +from aesara import config +from aesara import tensor as at from aesara.graph.basic import Variable +from aesara.graph.op import Op, compute_test_value +from aesara.tensor.random.op import RandomVariable +from aesara.tensor.shape import SpecifyShape from aesara.tensor.var import TensorVariable from typing_extensions import TypeAlias @@ -34,8 +41,12 @@ "broadcast_distribution_samples", "broadcast_dist_samples_to", "rv_size_is_none", + "change_dist_size", ] +from pymc.aesaraf import PotentialShapeType +from pymc.exceptions import ShapeError + def to_tuple(shape): """Convert ints, arrays, and Nones to tuples @@ -541,3 +552,117 @@ def find_size( def rv_size_is_none(size: Variable) -> bool: """Check wether an rv size is None (ie., at.Constant([]))""" return size.type.shape == (0,) # type: ignore [attr-defined] + + +@singledispatch +def _change_dist_size(op: Op, dist: TensorVariable, new_size, expand): + raise NotImplementedError( + f"Variable {dist} of type {op} has no _change_dist_size implementation." + ) + + +def change_dist_size( + dist: TensorVariable, + new_size: PotentialShapeType, + expand: bool = False, +) -> TensorVariable: + """Change or expand the size of a Distribution. + + Parameters + ---------- + dist: + The old distribution to be resized. + new_size: + The new size of the distribution. + expand: bool, optional + If True, `new_size` is prepended to the existing distribution `size`, so that + the final size is equal to (*new_size, *dist.size). Defaults to false. + + Returns + ------- + A new distribution variable that is equivalent to the original distribution with + the new size. The new distribution will not reuse the old RandomState/Generator + input, so it will be independent from the original distribution. + + Examples + -------- + .. code-block:: python + x = Normal.dist(shape=(2, 3)) + new_x = change_dist_size(x, new_size=(5, 3), expand=False) + assert new_x.eval().shape == (5, 3) + + new_x = change_dist_size(x, new_size=(5, 3), expand=True) + assert new_x.eval().shape == (5, 3, 2, 3) + + """ + # Check the dimensionality of the `new_size` kwarg + new_size_ndim = np.ndim(new_size) # type: ignore + if new_size_ndim > 1: + raise ShapeError("The `new_size` must be ≤1-dimensional.", actual=new_size_ndim) + elif new_size_ndim == 0: + new_size = (new_size,) # type: ignore + else: + new_size = tuple(new_size) # type: ignore + + new_dist = _change_dist_size(dist.owner.op, dist, new_size=new_size, expand=expand) + + new_dist.name = dist.name + for k, v in dist.tag.__dict__.items(): + new_dist.tag.__dict__.setdefault(k, v) + + if config.compute_test_value != "off": + compute_test_value(new_dist) + + return new_dist + + +@_change_dist_size.register(RandomVariable) +def change_rv_size(op, rv, new_size, expand) -> TensorVariable: + # Extract the RV node that is to be resized + rv_node = rv.owner + old_rng, old_size, dtype, *dist_params = rv_node.inputs + + if expand: + shape = tuple(rv_node.op._infer_shape(old_size, dist_params)) + old_size = shape[: len(shape) - rv_node.op.ndim_supp] + new_size = tuple(new_size) + tuple(old_size) + + # Make sure the new size is a tensor. This dtype-aware conversion helps + # to not unnecessarily pick up a `Cast` in some cases (see #4652). + new_size = at.as_tensor(new_size, ndim=1, dtype="int64") + + new_rv = rv_node.op(*dist_params, size=new_size, dtype=dtype) + + # Replicate "traditional" rng default_update, if that was set for old_rng + default_update = getattr(old_rng, "default_update", None) + if default_update is not None: + if default_update is rv_node.outputs[0]: + new_rv.owner.inputs[0].default_update = new_rv.owner.outputs[0] + else: + warnings.warn( + f"Update expression of {rv} RNG could not be replicated in resized variable", + UserWarning, + ) + + return new_rv + + +@_change_dist_size.register(SpecifyShape) +def change_specify_shape_size(op, ss, new_size, expand) -> TensorVariable: + inner_var, *shapes = ss.owner.inputs + new_var = _change_dist_size(inner_var.owner.op, inner_var, new_size=new_size, expand=expand) + + new_shapes = [None] * new_var.ndim + # Old specify_shape is still valid + if expand: + if len(shapes) > 0: + new_shapes[-len(shapes) :] = shapes + # Old specify_shape is still valid for support dimensions. We do not reintroduce + # checks for resized dimensions, although we could... + else: + ndim_supp = new_var.owner.op.ndim_supp + if ndim_supp > 0: + new_shapes[-ndim_supp:] = shapes[-ndim_supp:] + + # specify_shape has a wrong signature https://github.com/aesara-devs/aesara/issues/1164 + return at.specify_shape(new_var, new_shapes) # type: ignore diff --git a/pymc/distributions/simulator.py b/pymc/distributions/simulator.py index a1c8d5d2a3..e29979def5 100644 --- a/pymc/distributions/simulator.py +++ b/pymc/distributions/simulator.py @@ -248,10 +248,10 @@ def logp(cls, value, sim_op, sim_inputs): # TODO: Model rngs should be updated prior to multiprocessing split, # in which case this would not be needed. However, that would have to be # done for every sampler that may accomodate Simulators - rng = aesara.shared(np.random.default_rng()) + rng = aesara.shared(np.random.default_rng(), name="simulator_rng") # Create a new simulatorRV with identical inputs as the original one sim_value = sim_op.make_node(rng, *sim_inputs[1:]).default_output() - sim_value.name = "sim_value" + sim_value.name = "simulator_value" return sim_op.distance( sim_op.epsilon, diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index 9fbc509174..bb23cce574 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -13,47 +13,51 @@ # limitations under the License. import warnings -from typing import Any, Optional, Tuple, Union +from typing import Any, Optional, Union import aesara import aesara.tensor as at import numpy as np -from aeppl.abstract import MeasurableVariable, _get_measurable_outputs +from aeppl.abstract import _get_measurable_outputs from aeppl.logprob import _logprob from aesara import scan -from aesara.compile.builders import OpFromGraph from aesara.graph import FunctionGraph, rewrite_graph -from aesara.graph.basic import Node +from aesara.graph.basic import Node, clone_replace from aesara.raise_op import Assert from aesara.tensor import TensorVariable from aesara.tensor.random.op import RandomVariable -from aesara.tensor.random.utils import normalize_size_param from aesara.tensor.rewriting.basic import ShapeFeature, topo_constant_folding -from pymc.aesaraf import change_rv_size, convert_observed_data, floatX, intX +from pymc.aesaraf import convert_observed_data, floatX, intX from pymc.distributions import distribution, multivariate from pymc.distributions.continuous import Flat, Normal, get_tau_sigma -from pymc.distributions.dist_math import check_parameters -from pymc.distributions.distribution import SymbolicDistribution, _moment, moment +from pymc.distributions.distribution import ( + Distribution, + SymbolicRandomVariable, + _moment, + moment, +) from pymc.distributions.logprob import ignore_logprob, logp from pymc.distributions.shape_utils import ( Dims, Shape, + _change_dist_size, + change_dist_size, convert_dims, - rv_size_is_none, to_tuple, ) from pymc.model import modelcontext from pymc.util import check_dist_not_registered __all__ = [ - "AR", + "RandomWalk", "GaussianRandomWalk", - "GARCH11", - "EulerMaruyama", "MvGaussianRandomWalk", "MvStudentTRandomWalk", + "AR", + "GARCH11", + "EulerMaruyama", ] @@ -91,14 +95,12 @@ def get_steps( inferred_steps = None if shape is not None: shape = to_tuple(shape) - if shape[-1] is not ...: - inferred_steps = shape[-1] - step_shape_offset + inferred_steps = shape[-1] - step_shape_offset if inferred_steps is None and dims is not None: dims = convert_dims(dims) - if dims[-1] is not ...: - model = modelcontext(None) - inferred_steps = model.dim_lengths[dims[-1]] - step_shape_offset + model = modelcontext(None) + inferred_steps = model.dim_lengths[dims[-1]] - step_shape_offset if inferred_steps is None and observed is not None: observed = convert_observed_data(observed) @@ -114,109 +116,133 @@ def get_steps( return inferred_steps -class GaussianRandomWalkRV(RandomVariable): - """ - GaussianRandomWalk Random Variable - """ +class RandomWalkRV(SymbolicRandomVariable): + """RandomWalk Variable""" - name = "GaussianRandomWalk" - ndim_supp = 1 - ndims_params = [0, 0, 0, 0] - dtype = "floatX" - _print_name = ("GaussianRandomWalk", "\\operatorname{GaussianRandomWalk}") - - def make_node(self, rng, size, dtype, mu, sigma, init_dist, steps): - steps = at.as_tensor_variable(steps) - if not steps.ndim == 0 or not steps.dtype.startswith("int"): - raise ValueError("steps must be an integer scalar (ndim=0).") + default_output = 0 + _print_name = ("RandomWalk", "\\operatorname{RandomWalk}") - mu = at.as_tensor_variable(mu) - sigma = at.as_tensor_variable(sigma) - init_dist = at.as_tensor_variable(init_dist) - # Resize init distribution - size = normalize_size_param(size) - # If not explicit, size is determined by the shapes of mu, sigma, and init - init_dist_size = ( - size if not rv_size_is_none(size) else at.broadcast_shape(mu, sigma, init_dist) - ) - init_dist = change_rv_size(init_dist, init_dist_size) +class RandomWalk(Distribution): + r"""RandomWalk Distribution - return super().make_node(rng, size, dtype, mu, sigma, init_dist, steps) + TODO: Expand docstrings + """ - def _supp_shape_from_params(self, dist_params, reop_param_idx=0, param_shapes=None): - steps = dist_params[3] + rv_type = RandomWalkRV - return (steps + 1,) + def __new__(cls, *args, steps=None, **kwargs): + steps = get_steps( + steps=steps, + shape=None, # Shape will be checked in `cls.dist` + dims=kwargs.get("dims", None), + observed=kwargs.get("observed", None), + step_shape_offset=1, + ) + return super().__new__(cls, *args, steps=steps, **kwargs) @classmethod - def rng_fn( - cls, - rng: np.random.RandomState, - mu: Union[np.ndarray, float], - sigma: Union[np.ndarray, float], - init_dist: Union[np.ndarray, float], - steps: int, - size: Tuple[int], - ) -> np.ndarray: - """Gaussian Random Walk generator. - - The init value is drawn from the Normal distribution with the same sigma as the - innovations. - - Notes - ----- - Currently does not support custom init distribution - - Parameters - ---------- - rng: np.random.RandomState - Numpy random number generator - mu: array_like of float - Random walk mean - sigma: array_like of float - Standard deviation of innovation (sigma > 0) - init_dist: array_like of float - Initialization value for GaussianRandomWalk - steps: int - Length of random walk, must be greater than 1. Returned array will be of size+1 to - account as first value is initial value - size: tuple of int - The number of Random Walk time series generated + def dist(cls, init_dist, innovation_dist, steps=None, **kwargs) -> at.TensorVariable: + steps = get_steps( + steps=steps, + shape=kwargs.get("shape"), + step_shape_offset=1, + ) + if steps is None: + raise ValueError("Must specify steps or shape parameter") + steps = at.as_tensor_variable(intX(steps)) - Returns - ------- - ndarray - """ + if not ( + isinstance(init_dist, at.TensorVariable) + and init_dist.owner is not None + and isinstance(init_dist.owner.op, (RandomVariable, SymbolicRandomVariable)) + # TODO: Lift univariate constraint on init_dist + and init_dist.owner.op.ndim_supp == 0 + ): + raise TypeError("init_dist must be a univariate distribution variable") + check_dist_not_registered(init_dist) + + if not ( + isinstance(innovation_dist, at.TensorVariable) + and init_dist.owner is not None + and isinstance(init_dist.owner.op, (RandomVariable, SymbolicRandomVariable)) + # TODO: Lift univariate constraint on inovvation_dist + and init_dist.owner.op.ndim_supp == 0 + ): + raise TypeError("init_dist must be a univariate distribution variable") + check_dist_not_registered(init_dist) + + return super().dist([init_dist, innovation_dist, steps], **kwargs) - if steps < 1: - raise ValueError("Steps must be greater than 0") + @classmethod + def rv_op(cls, init_dist, innovation_dist, steps, size=None): + if not steps.ndim == 0 or not steps.dtype.startswith("int"): + raise ValueError("steps must be an integer scalar (ndim=0).") - # If size is None then the returned series should be (*implied_dims, 1+steps) + # If not explicit, size is determined by the shapes of the input distributions if size is None: - # broadcast parameters with each other to find implied dims - bcast_shape = np.broadcast_shapes( - np.asarray(mu).shape, - np.asarray(sigma).shape, - np.asarray(init_dist).shape, - ) - dist_shape = (*bcast_shape, int(steps)) - - # If size is None then the returned series should be (*size, 1+steps) - else: - dist_shape = (*size, int(steps)) - - # Add one dimension to the right, so that mu and sigma broadcast safely along - # the steps dimension - innovations = rng.normal(loc=mu[..., None], scale=sigma[..., None], size=dist_shape) - grw = np.concatenate([init_dist[..., None], innovations], axis=-1) - return np.cumsum(grw, axis=-1) - - -gaussianrandomwalk = GaussianRandomWalkRV() + size = at.broadcast_shape(init_dist, innovation_dist[..., 0]) + innovation_size = tuple(size) + (steps,) + + # Resize input distributions + init_dist = change_dist_size(init_dist, size) + innovation_dist = change_dist_size(innovation_dist, innovation_size) + + # Create SymbolicRV + init_dist_, innovation_dist_, steps_ = ( + init_dist.type(), + innovation_dist.type(), + steps.type(), + ) + grw_ = at.concatenate([init_dist_[..., None], innovation_dist_], axis=-1) + grw_ = at.cumsum(grw_, axis=-1) + return RandomWalkRV( + [init_dist_, innovation_dist_, steps_], + # We pass steps_ through just so we can keep a reference to it, even though + # it's no longer needed at this point + [grw_, steps_], + ndim_supp=1, + )(init_dist, innovation_dist, steps) + + +@_get_measurable_outputs.register(RandomWalkRV) +def _get_measurable_outputs_random_walk(op, node): + # Ignore steps output + return [node.default_output()] + + +@_change_dist_size.register(RandomWalkRV) +def change_random_walk_size(op, dist, new_size, expand): + init_dist, innovation_dist, steps = dist.owner.inputs + if expand: + old_size = init_dist.shape + new_size = tuple(new_size) + tuple(old_size) + return RandomWalk.rv_op(init_dist, innovation_dist, steps, size=new_size) + + +@_moment.register(RandomWalkRV) +def random_walk_moment(op, rv, init_dist, innovation_dist, steps): + grw_moment = at.zeros_like(rv) + grw_moment = at.set_subtensor(grw_moment[..., 0], moment(init_dist)) + grw_moment = at.set_subtensor(grw_moment[..., 1:], moment(innovation_dist)) + return at.cumsum(grw_moment, axis=-1) + + +@_logprob.register(RandomWalkRV) +def random_walk_logp(op, values, *inputs, **kwargs): + # ALthough Aeppl can derive the logprob of random walks, it does not collapse + # what PyMC considers the core dimension of steps. We do it manually here. + (value,) = values + # Recreate RV and obtain inner graph + rv_node = op.make_node(*inputs) + rv = clone_replace( + op.inner_outputs, replace={u: v for u, v in zip(op.inner_inputs, rv_node.inputs)} + )[op.default_output] + # Obtain logp via Aeppl of inner graph and collapse steps dimension + return logp(rv, value).sum(axis=-1) -class GaussianRandomWalk(distribution.Continuous): +class GaussianRandomWalk: r"""Random Walk with Normal innovations. Parameters @@ -236,33 +262,25 @@ class GaussianRandomWalk(distribution.Continuous): provided. """ - rv_op = gaussianrandomwalk - - def __new__(cls, *args, steps=None, **kwargs): - steps = get_steps( - steps=steps, - shape=None, # Shape will be checked in `cls.dist` - dims=kwargs.get("dims", None), - observed=kwargs.get("observed", None), - step_shape_offset=1, + def __new__(cls, name, mu=0.0, sigma=1.0, *, init_dist=None, steps=None, **kwargs): + init_dist, innovation_dist, kwargs = cls.get_dists( + mu=mu, sigma=sigma, init_dist=init_dist, **kwargs + ) + return RandomWalk( + name, init_dist=init_dist, innovation_dist=innovation_dist, steps=steps, **kwargs ) - return super().__new__(cls, *args, steps=steps, **kwargs) @classmethod def dist(cls, mu=0.0, sigma=1.0, *, init_dist=None, steps=None, **kwargs) -> at.TensorVariable: - - mu = at.as_tensor_variable(floatX(mu)) - sigma = at.as_tensor_variable(floatX(sigma)) - - steps = get_steps( - steps=steps, - shape=kwargs.get("shape"), - step_shape_offset=1, + init_dist, innovation_dist, kwargs = cls.get_dists( + mu=mu, sigma=sigma, init_dist=init_dist, **kwargs + ) + return RandomWalk.dist( + init_dist=init_dist, innovation_dist=innovation_dist, steps=steps, **kwargs ) - if steps is None: - raise ValueError("Must specify steps or shape parameter") - steps = at.as_tensor_variable(intX(steps)) + @classmethod + def get_dists(cls, *, mu, sigma, init_dist, **kwargs): if "init" in kwargs: warnings.warn( "init parameter is now called init_dist. Using init will raise an error in a future release.", @@ -278,60 +296,24 @@ def dist(cls, mu=0.0, sigma=1.0, *, init_dist=None, steps=None, **kwargs) -> at. UserWarning, ) init_dist = Normal.dist(0, 100) - else: - if not ( - isinstance(init_dist, at.TensorVariable) - and init_dist.owner is not None - and isinstance(init_dist.owner.op, RandomVariable) - and init_dist.owner.op.ndim_supp == 0 - ): - raise TypeError("init must be a univariate distribution variable") - check_dist_not_registered(init_dist) - # Ignores logprob of init var because that's accounted for in the logp method - init_dist = ignore_logprob(init_dist) - - return super().dist([mu, sigma, init_dist, steps], **kwargs) - - def moment(rv, size, mu, sigma, init_dist, steps): - grw_moment = at.zeros_like(rv) - grw_moment = at.set_subtensor(grw_moment[..., 0], moment(init_dist)) - # Add one dimension to the right, so that mu broadcasts safely along the steps - # dimension - grw_moment = at.set_subtensor(grw_moment[..., 1:], mu[..., None]) - return at.cumsum(grw_moment, axis=-1) - - def logp( - value: at.Variable, - mu: at.Variable, - sigma: at.Variable, - init_dist: at.Variable, - steps: at.Variable, - ) -> at.TensorVariable: - """Calculate log-probability of Gaussian Random Walk distribution at specified value.""" - - # Calculate initialization logp - init_logp = logp(init_dist, value[..., 0]) - - # Make time series stationary around the mean value - stationary_series = value[..., 1:] - value[..., :-1] # Add one dimension to the right, so that mu and sigma broadcast safely along # the steps dimension - series_logp = logp(Normal.dist(mu[..., None], sigma[..., None]), stationary_series) + mu = at.as_tensor_variable(mu) + sigma = at.as_tensor_variable(sigma) + innovation_dist = Normal.dist(mu=mu[..., None], sigma=sigma[..., None]) - return check_parameters( - init_logp + series_logp.sum(axis=-1), - steps > 0, - msg="steps > 0", - ) + return init_dist, innovation_dist, kwargs -class AutoRegressiveRV(OpFromGraph): +class AutoRegressiveRV(SymbolicRandomVariable): """A placeholder used to specify a log-likelihood for an AR sub-graph.""" + _print_name = ("AR", "\\operatorname{AR}") default_output = 1 ar_order: int constant_term: bool + _print_name = ("AR", "\\operatorname{AR}") def __init__(self, *args, ar_order, constant_term, **kwargs): self.ar_order = ar_order @@ -344,7 +326,7 @@ def update(self, node: Node): return {node.inputs[-1]: node.outputs[0]} -class AR(SymbolicDistribution): +class AR(Distribution): r"""Autoregressive process with p lags. .. math:: @@ -404,6 +386,8 @@ class AR(SymbolicDistribution): """ + rv_type = AutoRegressiveRV + def __new__(cls, name, rho, *args, steps=None, constant=False, ar_order=None, **kwargs): rhos = at.atleast_1d(at.as_tensor_variable(floatX(rho))) ar_order = cls._get_ar_order(rhos=rhos, constant=constant, ar_order=ar_order) @@ -450,7 +434,7 @@ def dist( if init_dist is not None: if not isinstance(init_dist, TensorVariable) or not isinstance( - init_dist.owner.op, RandomVariable + init_dist.owner.op, (RandomVariable, SymbolicRandomVariable) ): raise ValueError( f"Init dist must be a distribution created via the `.dist()` API, " @@ -523,13 +507,13 @@ def rv_op(cls, rhos, sigma, init_dist, steps, ar_order, constant_term, size=None else: # In this case the size of the init_dist depends on the parameters shape # The last dimension of rho and init_dist does not matter - batch_size = at.broadcast_shape(sigma, rhos[..., 0], init_dist[..., 0]) + batch_size = at.broadcast_shape(sigma, rhos[..., 0], at.atleast_1d(init_dist)[..., 0]) if init_dist.owner.op.ndim_supp == 0: init_dist_size = (*batch_size, ar_order) else: # In this case the support dimension must cover for ar_order init_dist_size = batch_size - init_dist = change_rv_size(init_dist, init_dist_size) + init_dist = change_dist_size(init_dist, init_dist_size) # Create OpFromGraph representing random draws form AR process # Variables with underscore suffix are dummy inputs into the OpFromGraph @@ -571,35 +555,26 @@ def step(*args): outputs=[noise_next_rng, ar_], ar_order=ar_order, constant_term=constant_term, - inline=True, + ndim_supp=1, ) ar = ar_op(rhos, sigma, init_dist, steps) return ar - @classmethod - def change_size(cls, rv, new_size, expand=False): - - if expand: - old_size = rv.shape[:-1] - new_size = at.concatenate([new_size, old_size]) - - op = rv.owner.op - return cls.rv_op( - *rv.owner.inputs, - ar_order=op.ar_order, - constant_term=op.constant_term, - size=new_size, - ) +@_change_dist_size.register(AutoRegressiveRV) +def change_ar_size(op, dist, new_size, expand=False): -MeasurableVariable.register(AutoRegressiveRV) + if expand: + old_size = dist.shape[:-1] + new_size = tuple(new_size) + tuple(old_size) - -@_get_measurable_outputs.register(AutoRegressiveRV) -def _get_measurable_outputs_ar(op, node): - # This tells Aeppl that the second output is the measurable one - return [node.outputs[1]] + return AR.rv_op( + *dist.owner.inputs[:-1], + ar_order=op.ar_order, + constant_term=op.constant_term, + size=new_size, + ) @_logprob.register(AutoRegressiveRV) diff --git a/pymc/initial_point.py b/pymc/initial_point.py index 11e80d0fa2..7b09f856b1 100644 --- a/pymc/initial_point.py +++ b/pymc/initial_point.py @@ -24,7 +24,7 @@ from aesara.graph.fg import FunctionGraph from aesara.tensor.var import TensorVariable -from pymc.aesaraf import compile_pymc, find_rng_nodes, reseed_rngs +from pymc.aesaraf import compile_pymc, find_rng_nodes, replace_rng_nodes, reseed_rngs from pymc.util import get_transformed_name, get_untransformed_name, is_transformed_name StartDict = Dict[Union[Variable, str], Union[np.ndarray, Variable, str]] @@ -167,18 +167,8 @@ def make_initial_point_fn( # Replace original rng shared variables so that we don't mess with them # when calling the final seeded function - graph = FunctionGraph(outputs=initial_values, clone=False) - rng_nodes = find_rng_nodes(graph.outputs) - new_rng_nodes: List[Union[np.random.RandomState, np.random.Generator]] = [] - for rng_node in rng_nodes: - rng_cls: type - if isinstance(rng_node, at.random.var.RandomStateSharedVariable): - rng_cls = np.random.RandomState - else: - rng_cls = np.random.Generator - new_rng_nodes.append(aesara.shared(rng_cls(np.random.PCG64()))) - graph.replace_all(zip(rng_nodes, new_rng_nodes), import_missing=True) - func = compile_pymc(inputs=[], outputs=graph.outputs, mode=aesara.compile.mode.FAST_COMPILE) + initial_values = replace_rng_nodes(initial_values) + func = compile_pymc(inputs=[], outputs=initial_values, mode=aesara.compile.mode.FAST_COMPILE) varnames = [] for var in model.free_RVs: diff --git a/pymc/printing.py b/pymc/printing.py index 87c43924ec..b89d342b62 100644 --- a/pymc/printing.py +++ b/pymc/printing.py @@ -20,6 +20,10 @@ from aesara.tensor.basic import TensorVariable, Variable from aesara.tensor.elemwise import DimShuffle from aesara.tensor.random.basic import RandomVariable +from aesara.tensor.random.var import ( + RandomGeneratorSharedVariable, + RandomStateSharedVariable, +) from aesara.tensor.var import TensorConstant from pymc.model import Model @@ -31,40 +35,62 @@ ] -def str_for_dist(rv: TensorVariable, formatting: str = "plain", include_params: bool = True) -> str: - """Make a human-readable string representation of a RandomVariable in a model, either +def str_for_dist( + dist: TensorVariable, formatting: str = "plain", include_params: bool = True +) -> str: + """Make a human-readable string representation of a Distribution in a model, either LaTeX or plain, optionally with distribution parameter values included.""" if include_params: # first 3 args are always (rng, size, dtype), rest is relevant for distribution - dist_args = [_str_for_input_var(x, formatting=formatting) for x in rv.owner.inputs[3:]] + if isinstance(dist.owner.op, RandomVariable): + dist_args = [ + _str_for_input_var(x, formatting=formatting) for x in dist.owner.inputs[3:] + ] + else: + dist_args = [ + _str_for_input_var(x, formatting=formatting) + for x in dist.owner.inputs + if not isinstance(x, (RandomStateSharedVariable, RandomGeneratorSharedVariable)) + ] + + print_name = dist.name - print_name = rv.name if rv.name is not None else "" if "latex" in formatting: - print_name = r"\text{" + _latex_escape(print_name) + "}" - dist_name = rv.owner.op._print_name[1] + if print_name is not None: + print_name = r"\text{" + _latex_escape(dist.name) + "}" + + op_name = ( + dist.owner.op._print_name[1] + if hasattr(dist.owner.op, "_print_name") + else r"\\operatorname{Unknown}" + ) if include_params: - return r"${} \sim {}({})$".format(print_name, dist_name, ",~".join(dist_args)) + if print_name: + return r"${} \sim {}({})$".format(print_name, op_name, ",~".join(dist_args)) + else: + return r"${}({})$".format(op_name, ",~".join(dist_args)) + else: - return rf"${print_name} \sim {dist_name}$" + if print_name: + return rf"${print_name} \sim {op_name}$" + else: + return rf"${op_name}$" + else: # plain - dist_name = rv.owner.op._print_name[0] + dist_name = ( + dist.owner.op._print_name[0] if hasattr(dist.owner.op, "_print_name") else "Unknown" + ) if include_params: - return r"{} ~ {}({})".format(print_name, dist_name, ", ".join(dist_args)) + if print_name: + return r"{} ~ {}({})".format(print_name, dist_name, ", ".join(dist_args)) + else: + return r"{}({})".format(dist_name, ", ".join(dist_args)) else: - return rf"{print_name} ~ {dist_name}" - - -def str_for_symbolic_dist( - rv: TensorVariable, formatting: str = "plain", include_params: bool = True -) -> str: - """Make a human-readable string representation of a SymbolicDistribution in a model, - either LaTeX or plain, optionally with distribution parameter values included.""" - - if "latex" in formatting: - return rf"$\text{{{rv.name}}} \sim \text{{{rv.owner.op}}}$" - else: - return rf"{rv.name} ~ {rv.owner.op}" + if print_name: + return rf"{print_name} ~ {dist_name}" + else: + return dist_name def str_for_model(model: Model, formatting: str = "plain", include_params: bool = True) -> str: @@ -125,6 +151,9 @@ def str_for_potential_or_deterministic( def _str_for_input_var(var: Variable, formatting: str) -> str: + # Avoid circular import + from pymc.distributions.distribution import SymbolicRandomVariable + def _is_potential_or_determinstic(var: Variable) -> bool: try: return var.str_repr.__func__.func is str_for_potential_or_deterministic @@ -134,7 +163,9 @@ def _is_potential_or_determinstic(var: Variable) -> bool: if isinstance(var, TensorConstant): return _str_for_constant(var, formatting) - elif isinstance(var.owner.op, RandomVariable) or _is_potential_or_determinstic(var): + elif isinstance( + var.owner.op, (RandomVariable, SymbolicRandomVariable) + ) or _is_potential_or_determinstic(var): # show the names for RandomVariables, Deterministics, and Potentials, rather # than the full expression return _str_for_input_rv(var, formatting) @@ -145,7 +176,11 @@ def _is_potential_or_determinstic(var: Variable) -> bool: def _str_for_input_rv(var: Variable, formatting: str) -> str: - _str = var.name if var.name is not None else "" + _str = ( + var.name + if var.name is not None + else str_for_dist(var, formatting=formatting, include_params=True) + ) if "latex" in formatting: return r"\text{" + _latex_escape(_str) + "}" else: @@ -164,15 +199,18 @@ def _str_for_constant(var: TensorConstant, formatting: str) -> str: def _str_for_expression(var: Variable, formatting: str) -> str: + # Avoid circular import + from pymc.distributions.distribution import SymbolicRandomVariable + # construct a string like f(a1, ..., aN) listing all random variables a as arguments def _expand(x): - if x.owner and (not isinstance(x.owner.op, RandomVariable)): + if x.owner and (not isinstance(x.owner.op, (RandomVariable, SymbolicRandomVariable))): return reversed(x.owner.inputs) parents = [ x for x in walk(nodes=var.owner.inputs, expand=_expand) - if x.owner and isinstance(x.owner.op, RandomVariable) + if x.owner and isinstance(x.owner.op, (RandomVariable, SymbolicRandomVariable)) ] names = [x.name for x in parents] diff --git a/pymc/sampling.py b/pymc/sampling.py index 0322ed7f2d..20100de25e 100644 --- a/pymc/sampling.py +++ b/pymc/sampling.py @@ -45,7 +45,6 @@ from aesara import tensor as at from aesara.graph.basic import Apply, Constant, Variable, general_toposort, walk from aesara.graph.fg import FunctionGraph -from aesara.tensor.random.op import RandomVariable from aesara.tensor.random.var import ( RandomGeneratorSharedVariable, RandomStateSharedVariable, @@ -1701,10 +1700,7 @@ def compile_forward_sampling_function( and not isinstance(node, (RandomStateSharedVariable, RandomGeneratorSharedVariable)) ) or ( # Basic RVs that are not in the trace - node.owner - and isinstance(node.owner.op, RandomVariable) - and node in basic_rvs - and node not in vars_in_trace + node in basic_rvs and node not in vars_in_trace ) or ( # Variables that have any volatile input node.owner and any(inp in volatile_nodes for inp in node.owner.inputs) diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py index 418c65eb2d..e036b0f667 100644 --- a/pymc/step_methods/metropolis.py +++ b/pymc/step_methods/metropolis.py @@ -23,7 +23,14 @@ import pymc as pm -from pymc.aesaraf import compile_pymc, floatX, rvs_to_value_vars +from pymc.aesaraf import ( + CallableTensor, + compile_pymc, + floatX, + join_nonshared_inputs, + replace_rng_nodes, + rvs_to_value_vars, +) from pymc.blocking import DictToArrayBijection, RaveledVars from pymc.step_methods.arraystep import ( ArrayStep, @@ -1046,12 +1053,14 @@ def sample_except(limit, excluded): def delta_logp(point, logp, vars, shared): - [logp0], inarray0 = pm.join_nonshared_inputs(point, [logp], vars, shared) + [logp0], inarray0 = join_nonshared_inputs(point, [logp], vars, shared) tensor_type = inarray0.type inarray1 = tensor_type("inarray1") - logp1 = pm.CallableTensor(logp0)(inarray1) + logp1 = CallableTensor(logp0)(inarray1) + # Replace any potential duplicated RNG nodes + (logp1,) = replace_rng_nodes((logp1,)) f = compile_pymc([inarray1, inarray0], logp1 - logp0) f.trust_input = True diff --git a/pymc/tests/distributions/test_censored.py b/pymc/tests/distributions/test_censored.py index a62f4eab65..90bac6714e 100644 --- a/pymc/tests/distributions/test_censored.py +++ b/pymc/tests/distributions/test_censored.py @@ -17,6 +17,8 @@ import pymc as pm +from pymc.distributions.shape_utils import change_dist_size + class TestCensored: @pytest.mark.parametrize("censored", (False, True)) @@ -88,13 +90,13 @@ def test_censored_invalid_dist(self): ): x = pm.Censored("x", registered_dist, lower=None, upper=None) - def test_change_size(self): + def test_change_dist_size(self): base_dist = pm.Censored.dist(pm.Normal.dist(), -1, 1, size=(3, 2)) - new_dist = pm.Censored.change_size(base_dist, (4,)) + new_dist = change_dist_size(base_dist, (4,)) assert new_dist.eval().shape == (4,) - new_dist = pm.Censored.change_size(base_dist, (4,), expand=True) + new_dist = change_dist_size(base_dist, (4,), expand=True) assert new_dist.eval().shape == (4, 3, 2) def test_dist_broadcasted_by_lower_upper(self): diff --git a/pymc/tests/distributions/test_discrete.py b/pymc/tests/distributions/test_discrete.py index 70ea64eb5c..a885170ba2 100644 --- a/pymc/tests/distributions/test_discrete.py +++ b/pymc/tests/distributions/test_discrete.py @@ -891,7 +891,7 @@ class TestLogitBinomial(BaseTestDistributionRandom): pymc_dist = pm.Binomial pymc_dist_params = {"n": 100, "logit_p": 0.5} expected_rv_op_params = {"n": 100, "p": sp.expit(0.5)} - tests_to_run = ["check_pymc_params_match_rv_op"] + checks_to_run = ["check_pymc_params_match_rv_op"] @pytest.mark.parametrize( "n, p, logit_p, expected", @@ -968,7 +968,10 @@ class TestLogitCategorical(BaseTestDistributionRandom): expected_rv_op_params = { "p": sp.softmax(np.array([[0.28, 0.62, 0.10], [0.28, 0.62, 0.10]]), axis=-1) } - tests_to_run = [ + sizes_to_check = [None, (), (2,), (4, 2), (1, 2)] + sizes_expected = [(2,), (2,), (2,), (4, 2), (1, 2)] + + checks_to_run = [ "check_pymc_params_match_rv_op", "check_rv_size", ] diff --git a/pymc/tests/distributions/test_distribution.py b/pymc/tests/distributions/test_distribution.py index 23735656ac..0cea2916f5 100644 --- a/pymc/tests/distributions/test_distribution.py +++ b/pymc/tests/distributions/test_distribution.py @@ -20,12 +20,13 @@ import numpy.testing as npt import pytest +from aeppl.abstract import get_measurable_outputs from aesara.tensor import TensorVariable import pymc as pm -from pymc.distributions import MvNormal, MvStudentT, joint_logp, logp -from pymc.distributions.distribution import _moment, moment +from pymc.distributions import DiracDelta, Flat, MvNormal, MvStudentT, joint_logp, logp +from pymc.distributions.distribution import SymbolicRandomVariable, _moment, moment from pymc.distributions.shape_utils import to_tuple from pymc.tests.distributions.util import assert_moment_is_expected @@ -93,7 +94,7 @@ def test_all_distributions_have_moments(): dists = (getattr(dist_module, dist) for dist in dist_module.__all__) dists = (dist for dist in dists if isinstance(dist, DistributionMeta)) missing_moments = { - dist for dist in dists if type(getattr(dist, "rv_op", None)) not in _moment.registry + dist for dist in dists if getattr(dist, "rv_type", None) not in _moment.registry } # Ignore super classes @@ -111,6 +112,7 @@ def test_all_distributions_have_moments(): dist_module.timeseries.GARCH11, dist_module.timeseries.MvGaussianRandomWalk, dist_module.timeseries.MvStudentTRandomWalk, + dist_module.timeseries.EulerMaruyama, } # Distributions that have been refactored but don't yet have moments @@ -302,3 +304,31 @@ def _random(mu, rng=None, size=None): match="Cannot safely infer the size of a multivariate random variable's moment.", ): evaled_moment = moment(a).eval({mu: mu_val}) + + +class TestSymbolicRandomVarible: + def test_inline(self): + class TestSymbolicRV(SymbolicRandomVariable): + pass + + x = TestSymbolicRV([], [Flat.dist()], ndim_supp=0)() + + # By default, the SymbolicRandomVariable will not be inlined. Because we did not + # dispatch a custom logprob function it will raise next + with pytest.raises(NotImplementedError): + logp(x, 0) + + class TestInlinedSymbolicRV(SymbolicRandomVariable): + inline_aeppl = True + + x_inline = TestInlinedSymbolicRV([], [Flat.dist()], ndim_supp=0)() + assert np.isclose(logp(x_inline, 0).eval(), 0) + + def test_measurable_outputs(self): + class TestSymbolicRV(SymbolicRandomVariable): + pass + + next_rng_, dirac_delta_ = DiracDelta.dist(5).owner.outputs + next_rng, dirac_delta = TestSymbolicRV([], [next_rng_, dirac_delta_], ndim_supp=0)() + node = dirac_delta.owner + assert get_measurable_outputs(node.op, node) == [dirac_delta] diff --git a/pymc/tests/distributions/test_logprob.py b/pymc/tests/distributions/test_logprob.py index 731226423a..26bc0c4ff7 100644 --- a/pymc/tests/distributions/test_logprob.py +++ b/pymc/tests/distributions/test_logprob.py @@ -378,3 +378,17 @@ def test_hierarchical_obs_logp(): ops = {a.owner.op for a in logp_ancestors if a.owner} assert len(ops) > 0 assert not any(isinstance(o, RandomVariable) for o in ops) + + +def test_logprob_join_constant_shapes(): + x = at.random.normal(size=5) + y = at.random.normal(size=3) + xy = at.join(x, y) + xy_vv = at.vector("xy_vv") + + xy_logp = logp(xy, xy_vv) + # This is what Aeppl does not do! + assert_no_rvs(xy_logp) + + f = aesara.function([xy_vv], xy_logp) + np.testing.assert_array_equal(f(np.zeros(8)), sp.norm.logpdf(np.zeros(8))) diff --git a/pymc/tests/distributions/test_mixture.py b/pymc/tests/distributions/test_mixture.py index dd162aa487..ca81a4c718 100644 --- a/pymc/tests/distributions/test_mixture.py +++ b/pymc/tests/distributions/test_mixture.py @@ -51,7 +51,7 @@ ) from pymc.distributions.logprob import logp from pymc.distributions.mixture import MixtureTransformWarning -from pymc.distributions.shape_utils import to_tuple +from pymc.distributions.shape_utils import change_dist_size, to_tuple from pymc.distributions.transforms import _default_transform from pymc.math import expand_packed_triangular from pymc.model import Model @@ -387,21 +387,21 @@ def test_components_expanded_by_weights(self, comp_dists): ), ) @pytest.mark.parametrize("expand", (False, True)) - def test_change_size(self, comp_dists, expand): + def test_change_dist_size(self, comp_dists, expand): if isinstance(comp_dists, list): univariate = comp_dists[0].owner.op.ndim_supp == 0 else: univariate = comp_dists.owner.op.ndim_supp == 0 mix = Mixture.dist(w=Dirichlet.dist([1, 1]), comp_dists=comp_dists) - mix = Mixture.change_size(mix, new_size=(4,), expand=expand) + mix = change_dist_size(mix, new_size=(4,), expand=expand) draws = mix.eval() expected_shape = (4,) if univariate else (4, 3) assert draws.shape == expected_shape assert np.unique(draws).size == draws.size mix = Mixture.dist(w=Dirichlet.dist([1, 1]), comp_dists=comp_dists, size=(3,)) - mix = Mixture.change_size(mix, new_size=(5, 4), expand=expand) + mix = change_dist_size(mix, new_size=(5, 4), expand=expand) draws = mix.eval() expected_shape = (5, 4) if univariate else (5, 4, 3) if expand: @@ -599,13 +599,17 @@ def test_list_mvnormals_predictive_sampling_shape(self): assert prior["mu0"].shape == (n_samples, D) assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2) - @pytest.mark.xfail(reason="Nested mixtures not refactored yet") def test_nested_mixture(self): if aesara.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 nbr = 4 + + norm_x = generate_normal_mixture_data( + np.r_[0.75, 0.25], np.r_[0.0, 5.0], np.r_[1.0, 1.0], size=1000 + ) + with Model() as model: # mixtures components g_comp = Normal.dist( @@ -622,7 +626,7 @@ def test_nested_mixture(self): l_mix = Mixture.dist(w=l_w, comp_dists=l_comp) # mixture of mixtures mix_w = Dirichlet("mix_w", a=floatX(np.ones(2)), transform=None, shape=(2,)) - mix = Mixture("mix", w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(self.norm_x)) + mix = Mixture("mix", w=mix_w, comp_dists=[g_mix, l_mix], observed=np.exp(norm_x)) test_point = model.initial_point() @@ -658,21 +662,23 @@ def mixmixlogp(value, point): ) return priorlogp, mixmixlogpg - value = np.exp(self.norm_x)[:, None] + value = np.exp(norm_x)[:, None] priorlogp, mixmixlogpg = mixmixlogp(value, test_point) # check logp of mixture - assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) + mix_logp_fn = model.compile_logp(vars=[mix], sum=False) + assert_allclose(mixmixlogpg, mix_logp_fn(test_point)[0], rtol=rtol) # check model logp - assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol) + model_logp_fn = model.compile_logp() + assert_allclose(priorlogp + mixmixlogpg.sum(), model_logp_fn(test_point), rtol=rtol) # check input and check logp again test_point["g_w"] = np.asarray([0.1, 0.1, 0.2, 0.6]) test_point["mu_g"] = np.exp(np.random.randn(nbr)) priorlogp, mixmixlogpg = mixmixlogp(value, test_point) - assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point), rtol=rtol) - assert_allclose(priorlogp + mixmixlogpg.sum(), model.logp(test_point), rtol=rtol) + assert_allclose(mixmixlogpg, mix_logp_fn(test_point)[0], rtol=rtol) + assert_allclose(priorlogp + mixmixlogpg.sum(), model_logp_fn(test_point), rtol=rtol) def test_iterable_single_component_warning(self): with warnings.catch_warnings(): @@ -847,7 +853,6 @@ def ref_rand(size, w, mu, sigma): extra_args={"comp_shape": 2}, size=1000, ref_rand=ref_rand, - change_rv_size_fn=Mixture.change_size, ) pymc_random( NormalMixture, @@ -859,7 +864,6 @@ def ref_rand(size, w, mu, sigma): extra_args={"comp_shape": 3}, size=1000, ref_rand=ref_rand, - change_rv_size_fn=Mixture.change_size, ) diff --git a/pymc/tests/distributions/test_multivariate.py b/pymc/tests/distributions/test_multivariate.py index e1d1d24298..76052513df 100644 --- a/pymc/tests/distributions/test_multivariate.py +++ b/pymc/tests/distributions/test_multivariate.py @@ -38,8 +38,9 @@ _OrderedMultinomial, quaddist_matrix, ) -from pymc.distributions.shape_utils import to_tuple +from pymc.distributions.shape_utils import change_dist_size, to_tuple from pymc.math import kronecker +from pymc.sampling import draw from pymc.tests.distributions.util import ( BaseTestDistributionRandom, Domain, @@ -52,6 +53,7 @@ Vector, assert_moment_is_expected, check_logp, + pymc_random, seeded_numpy_distribution_builder, ) from pymc.tests.helpers import select_by_precision @@ -880,6 +882,18 @@ def test_sd_dist_automatically_resized(self, sd_dist, size, shape): # LKJCov has support shape `(n * (n+1)) // 2` assert x.eval().shape == (10, 6) + def test_change_dist_size(self): + x1 = pm.LKJCholeskyCov.dist( + n=3, eta=1, sd_dist=pm.Dirichlet.dist(np.ones(3)), size=(5,), compute_corr=False + ) + x2 = change_dist_size(x1, new_size=(10, 3), expand=False) + x3 = change_dist_size(x2, new_size=(3,), expand=True) + + draw_x1, draw_x2, draw_x3 = pm.draw([x1, x2, x3]) + assert draw_x1.shape == (5, 6) + assert draw_x2.shape == (10, 3, 6) + assert draw_x3.shape == (3, 10, 3, 6) + # Used for MvStudentT moment test rand1d = np.random.rand(2) @@ -1757,7 +1771,7 @@ class TestLKJCorr(BaseTestDistributionRandom): (2, 4, 2, 3), ] - tests_to_run = [ + checks_to_run = [ "check_pymc_params_match_rv_op", "check_rv_size", "check_draws_match_expected", @@ -1782,7 +1796,7 @@ def ref_rand(size, n, eta): class TestLKJCholeskyCov(BaseTestDistributionRandom): pymc_dist = _LKJCholeskyCov - pymc_dist_params = {"eta": 1.0, "n": 3, "sd_dist": pm.DiracDelta.dist([0.5, 1.0, 2.0])} + pymc_dist_params = {"n": 3, "eta": 1.0, "sd_dist": pm.DiracDelta.dist([0.5, 1.0, 2.0])} expected_rv_op_params = {"n": 3, "eta": 1.0, "sd_dist": pm.DiracDelta.dist([0.5, 1.0, 2.0])} size = None @@ -1797,11 +1811,16 @@ class TestLKJCholeskyCov(BaseTestDistributionRandom): (2, 4, 2, 6), ] - tests_to_run = [ + checks_to_run = [ "check_rv_size", "check_draws_match_expected", ] + def _instantiate_pymc_rv(self, dist_params=None): + # RNG cannot be passed through the PyMC class + params = dist_params if dist_params else self.pymc_dist_params + self.pymc_rv = self.pymc_dist.dist(**params, size=self.size) + def check_rv_size(self): for size, expected in zip(self.sizes_to_check, self.sizes_expected): sd_dist = pm.Exponential.dist(1, size=(*to_tuple(size), 3)) @@ -1812,9 +1831,9 @@ def check_rv_size(self): def check_draws_match_expected(self): # TODO: Find better comparison: - rng = aesara.shared(self.get_random_state(reset=True)) - x = _LKJCholeskyCov.dist(n=2, eta=10_000, sd_dist=pm.DiracDelta.dist([0.5, 2.0]), rng=rng) - assert np.all(np.abs(x.eval() - np.array([0.5, 0, 2.0])) < 0.01) + rng = self.get_random_state(reset=True) + x = _LKJCholeskyCov.dist(n=2, eta=10_000, sd_dist=pm.DiracDelta.dist([0.5, 2.0])) + assert np.all(np.abs(draw(x, random_seed=rng) - np.array([0.5, 0, 2.0])) < 0.01) @pytest.mark.parametrize("sparse", [True, False]) diff --git a/pymc/tests/distributions/test_shape_utils.py b/pymc/tests/distributions/test_shape_utils.py index 77c8ea35ba..8dad510a85 100644 --- a/pymc/tests/distributions/test_shape_utils.py +++ b/pymc/tests/distributions/test_shape_utils.py @@ -11,21 +11,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import re import warnings import aesara import numpy as np import pytest +from aesara import Mode from aesara import tensor as at +from aesara.graph import Constant, ancestors +from aesara.tensor.random import normal +from aesara.tensor.shape import SpecifyShape import pymc as pm +from pymc import ShapeError from pymc.distributions.shape_utils import ( broadcast_dist_samples_shape, broadcast_dist_samples_to, broadcast_distribution_samples, + change_dist_size, convert_dims, convert_shape, convert_size, @@ -496,3 +502,123 @@ def test_rv_size_is_none(): size = pm.Normal.dist(0, 1).size rv = pm.Normal.dist(0, 1, size=size) assert not rv_size_is_none(rv.owner.inputs[1]) + + +def test_change_rv_size(): + loc = at.as_tensor_variable([1, 2]) + rng = aesara.shared(np.random.default_rng()) + rv = normal(loc=loc, rng=rng) + assert rv.ndim == 1 + assert tuple(rv.shape.eval()) == (2,) + + with pytest.raises(ShapeError, match="must be ≤1-dimensional"): + change_dist_size(rv, new_size=[[2, 3]]) + with pytest.raises(ShapeError, match="must be ≤1-dimensional"): + change_dist_size(rv, new_size=at.as_tensor_variable([[2, 3], [4, 5]])) + + rv_new = change_dist_size(rv, new_size=(3,), expand=True) + assert rv_new.ndim == 2 + assert tuple(rv_new.shape.eval()) == (3, 2) + + # Make sure that the shape used to determine the expanded size doesn't + # depend on the old `RandomVariable`. + rv_new_ancestors = set(ancestors((rv_new,))) + assert loc in rv_new_ancestors + assert rv not in rv_new_ancestors + + # Check that the old rng is not reused + assert rv_new.owner.inputs[0] is not rng + + rv_newer = change_dist_size(rv_new, new_size=(4,), expand=True) + assert rv_newer.ndim == 3 + assert tuple(rv_newer.shape.eval()) == (4, 3, 2) + + # Make sure we avoid introducing a `Cast` by converting the new size before + # constructing the new `RandomVariable` + rv = normal(0, 1) + new_size = np.array([4, 3], dtype="int32") + rv_newer = change_dist_size(rv, new_size=new_size, expand=False) + assert rv_newer.ndim == 2 + assert isinstance(rv_newer.owner.inputs[1], Constant) + assert tuple(rv_newer.shape.eval()) == (4, 3) + + rv = normal(0, 1) + new_size = at.as_tensor(np.array([4, 3], dtype="int32")) + rv_newer = change_dist_size(rv, new_size=new_size, expand=True) + assert rv_newer.ndim == 2 + assert tuple(rv_newer.shape.eval()) == (4, 3) + + rv = normal(0, 1) + new_size = at.as_tensor(2, dtype="int32") + rv_newer = change_dist_size(rv, new_size=new_size, expand=True) + assert rv_newer.ndim == 1 + assert tuple(rv_newer.shape.eval()) == (2,) + + +def test_change_rv_size_default_update(): + rng = aesara.shared(np.random.default_rng(0)) + x = normal(rng=rng) + + # Test that "traditional" default_update is translated to the new rng + rng.default_update = x.owner.outputs[0] + new_x = change_dist_size(x, new_size=(2,)) + new_rng = new_x.owner.inputs[0] + assert rng.default_update is x.owner.outputs[0] + assert new_rng.default_update is new_x.owner.outputs[0] + + # Test that "non-traditional" default_update raises UserWarning + next_rng = aesara.shared(np.random.default_rng(1)) + rng.default_update = next_rng + with pytest.warns(UserWarning, match="could not be replicated in resized variable"): + new_x = change_dist_size(x, new_size=(2,)) + new_rng = new_x.owner.inputs[0] + assert rng.default_update is next_rng + assert not hasattr(new_rng, "default_update") + + # Test that default_update is not set if there was none before + del rng.default_update + new_x = change_dist_size(x, new_size=(2,)) + new_rng = new_x.owner.inputs[0] + assert not hasattr(new_rng, "default_update") + + +def test_change_specify_shape_size_univariate(): + with aesara.config.change_flags(mode=Mode("py")): + s1, s2 = at.iscalars("s1", "s2") + x = at.random.normal(size=(s1, s2)) + x = at.specify_shape(x, (5, 3)) + x.eval({s1: 5, s2: 3}).shape == (5, 3) + + new_x = change_dist_size(x, (10, 5)) + # SpecifyShape is no longer present + assert not isinstance(new_x.owner.op, SpecifyShape) + assert new_x.eval().shape == (10, 5) + + new_x = change_dist_size(x, (10, 5), expand=True) + # SpecifyShape is still present + assert isinstance(new_x.owner.op, SpecifyShape) + new_x.eval({s1: 5, s2: 3}).shape == (10, 5, 5, 3) + with pytest.raises(AssertionError, match=re.escape("expected (None, None, 5, 3)")): + new_x.eval({s1: 6, s2: 3}) + + +def test_change_specify_shape_size_multivariate(): + with aesara.config.change_flags(mode=Mode("py")): + batch, supp = at.iscalars("batch", "supp") + x = at.random.multivariate_normal(at.zeros(supp), at.eye(supp), size=(batch,)) + x = at.specify_shape(x, (5, 3)) + x.eval({batch: 5, supp: 3}).shape == (5, 3) + + new_x = change_dist_size(x, (10, 5)) + # SpecifyShape is still present in the support dimension + assert isinstance(new_x.owner.op, SpecifyShape) + assert new_x.eval({supp: 3}).shape == (10, 5, 3) + with pytest.raises(AssertionError, match=re.escape("expected (None, None, 3)")): + new_x.eval({supp: 4}) + + new_x = change_dist_size(x, (10, 5), expand=True) + # SpecifyShape is still present in both support and batch dimension + assert isinstance(new_x.owner.op, SpecifyShape) + new_x.eval({batch: 5, supp: 3}).shape == (10, 5, 5, 3) + with pytest.raises(AssertionError, match=re.escape("expected (None, None, 5, 3)")): + new_x.eval({batch: 6, supp: 3}).shape == (10, 5, 5, 3) diff --git a/pymc/tests/distributions/test_timeseries.py b/pymc/tests/distributions/test_timeseries.py index 9fd366803a..545a45a20f 100644 --- a/pymc/tests/distributions/test_timeseries.py +++ b/pymc/tests/distributions/test_timeseries.py @@ -28,6 +28,7 @@ from pymc.distributions.discrete import DiracDelta from pymc.distributions.logprob import logp from pymc.distributions.multivariate import Dirichlet +from pymc.distributions.shape_utils import change_dist_size from pymc.distributions.timeseries import ( AR, GARCH11, @@ -38,8 +39,6 @@ from pymc.model import Model from pymc.sampling import draw, sample, sample_posterior_predictive from pymc.tests.distributions.util import ( - BaseTestDistributionRandom, - Nat, R, Rplus, Vector, @@ -57,11 +56,9 @@ (None, (10,), 0, 10, True), (None, (10,), 1, 9, True), (None, (10, 5), 0, 5, True), - (None, (10, ...), 0, None, True), (None, None, 0, None, True), (10, (10,), 0, 10, True), (10, (11,), 1, 10, True), - (10, (5, ...), 1, 10, True), (10, (5, 5), 0, 5, False), (10, (5, 10), 1, 9, False), ], @@ -76,8 +73,8 @@ def test_get_steps(info_source, steps, shape, step_shape_offset, expected_steps, dims = None coords = {} else: - dims = tuple(str(i) if shape is not ... else ... for i, shape in enumerate(shape)) - coords = {str(i): range(shape) for i, shape in enumerate(shape) if shape is not ...} + dims = tuple(str(i) for i, shape in enumerate(shape)) + coords = {str(i): range(shape) for i, shape in enumerate(shape)} with Model(coords=coords): inferred_steps = get_steps(steps=steps, dims=dims, step_shape_offset=step_shape_offset) @@ -85,9 +82,6 @@ def test_get_steps(info_source, steps, shape, step_shape_offset, expected_steps, if shape is None: observed = None else: - if ... in shape: - # There is no equivalent to implied dims in observed - return observed = np.zeros(shape) inferred_steps = get_steps( steps=steps, observed=observed, step_shape_offset=step_shape_offset @@ -105,51 +99,20 @@ def test_get_steps(info_source, steps, shape, step_shape_offset, expected_steps, class TestGaussianRandomWalk: - class TestGaussianRandomWalkRandom(BaseTestDistributionRandom): - # Override default size for test class - size = None - - pymc_dist = pm.GaussianRandomWalk - pymc_dist_params = {"mu": 1.0, "sigma": 2, "init_dist": pm.DiracDelta.dist(0), "steps": 4} - expected_rv_op_params = { - "mu": 1.0, - "sigma": 2, - "init_dist": pm.DiracDelta.dist(0), - "steps": 4, - } - - checks_to_run = [ - "check_pymc_params_match_rv_op", - "check_rv_inferred_size", - ] - - def check_rv_inferred_size(self): - steps = self.pymc_dist_params["steps"] - sizes_to_check = [None, (), 1, (1,)] - sizes_expected = [(steps + 1,), (steps + 1,), (1, steps + 1), (1, steps + 1)] - - for size, expected in zip(sizes_to_check, sizes_expected): - pymc_rv = self.pymc_dist.dist(**self.pymc_dist_params, size=size) - expected_symbolic = tuple(pymc_rv.shape.eval()) - assert expected_symbolic == expected - - def test_steps_scalar_check(self): - with pytest.raises(ValueError, match="steps must be an integer scalar"): - self.pymc_dist.dist(steps=[1], init_dist=pm.DiracDelta.dist(0)) - def test_logp(self): - def ref_logp(value, mu, sigma, steps): + def ref_logp(value, mu, sigma): return ( - st.norm.logpdf(value[0], 0, 100) + st.norm.logpdf(np.diff(value), mu, sigma).sum() + st.norm.logpdf(value[0], 0, 100) # default init_dist has a scale 100 + + st.norm.logpdf(np.diff(value), mu, sigma).sum() ) check_logp( pm.GaussianRandomWalk, Vector(R, 4), - {"mu": R, "sigma": Rplus, "steps": Nat}, + {"mu": R, "sigma": Rplus}, ref_logp, decimal=select_by_precision(float64=6, float32=1), - extra_args={"init_dist": Normal.dist(0, 100)}, + extra_args={"steps": 3, "init_dist": Normal.dist(0, 100)}, ) def test_gaussianrandomwalk_inference(self): @@ -177,34 +140,27 @@ def test_gaussian_random_walk_init_dist_shape(self, init): with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Initial distribution not specified.*", UserWarning) grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init_dist=init) - assert tuple(grw.owner.inputs[-2].shape.eval()) == () + assert tuple(grw.owner.inputs[0].shape.eval()) == () grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init_dist=init, size=(5,)) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,) + assert tuple(grw.owner.inputs[0].shape.eval()) == (5,) grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init_dist=init, shape=2) - assert tuple(grw.owner.inputs[-2].shape.eval()) == () + assert tuple(grw.owner.inputs[0].shape.eval()) == () grw = pm.GaussianRandomWalk.dist(mu=0, sigma=1, steps=1, init_dist=init, shape=(5, 2)) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (5,) + assert tuple(grw.owner.inputs[0].shape.eval()) == (5,) grw = pm.GaussianRandomWalk.dist(mu=[0, 0], sigma=1, steps=1, init_dist=init) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (2,) + assert tuple(grw.owner.inputs[0].shape.eval()) == (2,) grw = pm.GaussianRandomWalk.dist(mu=0, sigma=[1, 1], steps=1, init_dist=init) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (2,) + assert tuple(grw.owner.inputs[0].shape.eval()) == (2,) grw = pm.GaussianRandomWalk.dist( mu=np.zeros((3, 1)), sigma=[1, 1], steps=1, init_dist=init ) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (3, 2) - - def test_shape_ellipsis(self): - grw = pm.GaussianRandomWalk.dist( - mu=0, sigma=1, steps=5, init_dist=pm.Normal.dist(), shape=(3, ...) - ) - assert tuple(grw.shape.eval()) == (3, 6) - assert tuple(grw.owner.inputs[-2].shape.eval()) == (3,) + assert tuple(grw.owner.inputs[0].shape.eval()) == (3, 2) def test_gaussianrandomwalk_broadcasted_by_init_dist(self): grw = pm.GaussianRandomWalk.dist( @@ -219,10 +175,9 @@ def test_inferred_steps_from_shape(self, shape): steps = x.owner.inputs[-1] assert steps.eval() == 5 - @pytest.mark.parametrize("shape", (None, (5, ...))) - def test_missing_steps(self, shape): + def test_missing_steps(self): with pytest.raises(ValueError, match="Must specify steps or shape parameter"): - GaussianRandomWalk.dist(shape=shape, init_dist=Normal.dist(0, 100)) + GaussianRandomWalk.dist(shape=None, init_dist=Normal.dist(0, 100)) def test_inconsistent_steps_and_shape(self): with pytest.raises(AssertionError, match="Steps do not match last shape dimension"): @@ -516,6 +471,15 @@ def test_init_deprecated_arg(self): with pytest.warns(FutureWarning, match="init parameter is now called init_dist"): pm.AR.dist(rho=[1, 2, 3], init=Normal.dist(), shape=(10,)) + def test_change_dist_size(self): + base_dist = pm.AR.dist(rho=[0.5, 0.5], init_dist=pm.Normal.dist(size=(2,)), shape=(3, 10)) + + new_dist = change_dist_size(base_dist, (4,)) + assert new_dist.eval().shape == (4, 10) + + new_dist = change_dist_size(base_dist, (4,), expand=True) + assert new_dist.eval().shape == (4, 3, 10) + @pytest.mark.xfail(reason="Timeseries not refactored", raises=NotImplementedError) def test_GARCH11(): diff --git a/pymc/tests/distributions/util.py b/pymc/tests/distributions/util.py index 2f25c77f9d..3f65350694 100644 --- a/pymc/tests/distributions/util.py +++ b/pymc/tests/distributions/util.py @@ -18,9 +18,10 @@ import pymc as pm -from pymc.aesaraf import change_rv_size, compile_pymc, floatX +from pymc.aesaraf import compile_pymc, floatX, intX from pymc.distributions import logcdf, logp from pymc.distributions.logprob import joint_logp +from pymc.distributions.shape_utils import change_dist_size from pymc.initial_point import make_initial_point_fn from pymc.tests.helpers import SeededTest, select_by_precision @@ -595,7 +596,6 @@ def pymc_random( fails=10, extra_args=None, model_args=None, - change_rv_size_fn=change_rv_size, ): if valuedomain is None: valuedomain = Domain([0], edges=(None, None)) @@ -604,7 +604,7 @@ def pymc_random( model_args = {} model, param_vars = build_model(dist, valuedomain, paramdomains, extra_args) - model_dist = change_rv_size_fn(model.named_vars["value"], size, expand=True) + model_dist = change_dist_size(model.named_vars["value"], size, expand=True) pymc_rand = compile_pymc([], model_dist) domains = paramdomains.copy() @@ -643,7 +643,7 @@ def pymc_random_discrete( valuedomain = Domain([0], edges=(None, None)) model, param_vars = build_model(dist, valuedomain, paramdomains) - model_dist = change_rv_size(model.named_vars["value"], size, expand=True) + model_dist = change_dist_size(model.named_vars["value"], size, expand=True) pymc_rand = compile_pymc([], model_dist) domains = paramdomains.copy() @@ -690,7 +690,7 @@ class BaseTestDistributionRandom(SeededTest): `check_pymc_draws_match_reference` 3. Shape variable inference is correct, via `check_rv_size` - Each desired test must be referenced by name in `tests_to_run`, when + Each desired test must be referenced by name in `checks_to_run`, when subclassing this distribution. Custom tests can be added to each class as well. See `TestFlat` for an example. @@ -706,7 +706,7 @@ class BaseTestDistributionRandom(SeededTest): ```python sizes_to_check = [None, (1), (2, 3)] sizes_expected = [(3,), (1, 3), (2, 3, 3)] - tests_to_run = ["check_rv_size"] + checks_to_run = ["check_rv_size"] ``` This is usually needed for Multivariate distributions. You can see an @@ -833,7 +833,7 @@ def check_rv_size(self): def validate_tests_list(self): assert len(self.checks_to_run) == len( set(self.checks_to_run) - ), "There are duplicates in the list of tests_to_run" + ), "There are duplicates in the list of checks_to_run" def seeded_scipy_distribution_builder(dist_name: str) -> Callable: diff --git a/pymc/tests/test_aesaraf.py b/pymc/tests/test_aesaraf.py index e3052b7458..bd618372b2 100644 --- a/pymc/tests/test_aesaraf.py +++ b/pymc/tests/test_aesaraf.py @@ -22,10 +22,9 @@ import pytest import scipy.sparse as sps -from aeppl.abstract import MeasurableVariable from aeppl.logprob import ParameterValueError from aesara.compile.builders import OpFromGraph -from aesara.graph.basic import Constant, Variable, ancestors, equal_computations +from aesara.graph.basic import Variable, equal_computations from aesara.tensor.random.basic import normal, uniform from aesara.tensor.random.op import RandomVariable from aesara.tensor.random.var import RandomStateSharedVariable @@ -35,16 +34,16 @@ import pymc as pm from pymc.aesaraf import ( - change_rv_size, compile_pymc, convert_observed_data, extract_obs_data, + replace_rng_nodes, reseed_rngs, rvs_to_value_vars, walk_model, ) from pymc.distributions.dist_math import check_parameters -from pymc.exceptions import ShapeError +from pymc.distributions.distribution import SymbolicRandomVariable from pymc.vartypes import int_types @@ -82,75 +81,6 @@ def test_pd_as_tensor_variable_multiindex() -> None: np.testing.assert_array_equal(x=at.as_tensor_variable(x=df).eval(), y=np_array) -def test_change_rv_size(): - loc = at.as_tensor_variable([1, 2]) - rv = normal(loc=loc) - assert rv.ndim == 1 - assert tuple(rv.shape.eval()) == (2,) - - with pytest.raises(ShapeError, match="must be ≤1-dimensional"): - change_rv_size(rv, new_size=[[2, 3]]) - with pytest.raises(ShapeError, match="must be ≤1-dimensional"): - change_rv_size(rv, new_size=at.as_tensor_variable([[2, 3], [4, 5]])) - - rv_new = change_rv_size(rv, new_size=(3,), expand=True) - assert rv_new.ndim == 2 - assert tuple(rv_new.shape.eval()) == (3, 2) - - # Make sure that the shape used to determine the expanded size doesn't - # depend on the old `RandomVariable`. - rv_new_ancestors = set(ancestors((rv_new,))) - assert loc in rv_new_ancestors - assert rv not in rv_new_ancestors - - rv_newer = change_rv_size(rv_new, new_size=(4,), expand=True) - assert rv_newer.ndim == 3 - assert tuple(rv_newer.shape.eval()) == (4, 3, 2) - - # Make sure we avoid introducing a `Cast` by converting the new size before - # constructing the new `RandomVariable` - rv = normal(0, 1) - new_size = np.array([4, 3], dtype="int32") - rv_newer = change_rv_size(rv, new_size=new_size, expand=False) - assert rv_newer.ndim == 2 - assert isinstance(rv_newer.owner.inputs[1], Constant) - assert tuple(rv_newer.shape.eval()) == (4, 3) - - rv = normal(0, 1) - new_size = at.as_tensor(np.array([4, 3], dtype="int32")) - rv_newer = change_rv_size(rv, new_size=new_size, expand=True) - assert rv_newer.ndim == 2 - assert tuple(rv_newer.shape.eval()) == (4, 3) - - rv = normal(0, 1) - new_size = at.as_tensor(2, dtype="int32") - rv_newer = change_rv_size(rv, new_size=new_size, expand=True) - assert rv_newer.ndim == 1 - assert tuple(rv_newer.shape.eval()) == (2,) - - -def test_change_rv_size_default_update(): - rng = aesara.shared(np.random.default_rng(0)) - x = normal(rng=rng) - - # Test that "traditional" default_update is updated - rng.default_update = x.owner.outputs[0] - new_x = change_rv_size(x, new_size=(2,)) - assert rng.default_update is not x.owner.outputs[0] - assert rng.default_update is new_x.owner.outputs[0] - - # Test that "non-traditional" default_update is left unchanged - next_rng = aesara.shared(np.random.default_rng(1)) - rng.default_update = next_rng - new_x = change_rv_size(x, new_size=(2,)) - assert rng.default_update is next_rng - - # Test that default_update is not set if there was none before - del rng.default_update - new_x = change_rv_size(x, new_size=(2,)) - assert not hasattr(rng, "default_update") - - class TestBroadcasting: def test_make_shared_replacements(self): """Check if pm.make_shared_replacements preserves broadcasting.""" @@ -528,20 +458,20 @@ def test_compile_pymc_updates_inputs(self): def test_compile_pymc_custom_update_op(self, _): """Test that custom MeasurableVariable Op updates are used by compile_pymc""" - class UnmeasurableOp(OpFromGraph): + class NonSymbolicRV(OpFromGraph): def update(self, node): return {node.inputs[0]: node.inputs[0] + 1} dummy_inputs = [at.scalar(), at.scalar()] dummy_outputs = [at.add(*dummy_inputs)] - dummy_x = UnmeasurableOp(dummy_inputs, dummy_outputs)(aesara.shared(1.0), 1.0) + dummy_x = NonSymbolicRV(dummy_inputs, dummy_outputs)(aesara.shared(1.0), 1.0) # Check that there are no updates at first fn = compile_pymc(inputs=[], outputs=dummy_x) assert fn() == fn() == 2.0 - # And they are enabled once the Op is registered as Measurable - MeasurableVariable.register(UnmeasurableOp) + # And they are enabled once the Op is registered as a SymbolicRV + SymbolicRandomVariable.register(NonSymbolicRV) fn = compile_pymc(inputs=[], outputs=dummy_x) assert fn() == 2.0 assert fn() == 3.0 @@ -573,6 +503,42 @@ def test_random_seed(self): assert x3_eval == x2_eval assert y3_eval == y2_eval + def test_multiple_updates_same_variable(self): + rng = aesara.shared(np.random.default_rng(), name="rng") + x = at.random.normal(rng=rng) + y = at.random.normal(rng=rng) + + assert compile_pymc([], [x]) + assert compile_pymc([], [y]) + msg = "Multiple update expressions found for the variable rng" + with pytest.raises(ValueError, match=msg): + compile_pymc([], [x, y]) + + +def test_replace_rng_nodes(): + rng = aesara.shared(np.random.default_rng()) + x = at.random.normal(rng=rng) + x_rng, *x_non_rng_inputs = x.owner.inputs + + cloned_x = x.owner.clone().default_output() + cloned_x_rng, *cloned_x_non_rng_inputs = cloned_x.owner.inputs + + # RNG inputs are the same across the two variables + assert x_rng is cloned_x_rng + + (new_x,) = replace_rng_nodes([cloned_x]) + new_x_rng, *new_x_non_rng_inputs = new_x.owner.inputs + + # Variables are still the same + assert new_x is cloned_x + + # RNG inputs are not the same as before + assert new_x_rng is not x_rng + + # All other inputs are the same as before + for non_rng_inputs, new_non_rng_inputs in zip(x_non_rng_inputs, new_x_non_rng_inputs): + assert non_rng_inputs is new_non_rng_inputs + def test_reseed_rngs(): # Reseed_rngs uses the `PCG64` bit_generator, which is currently the default diff --git a/pymc/tests/test_printing.py b/pymc/tests/test_printing.py index 2d082caa30..8ebaba564a 100644 --- a/pymc/tests/test_printing.py +++ b/pymc/tests/test_printing.py @@ -1,5 +1,6 @@ import numpy as np +from pymc import Bernoulli, Censored, Mixture from pymc.aesaraf import floatX from pymc.distributions import ( DirichletMultinomial, @@ -48,9 +49,14 @@ def setup_class(self): # ) nb2 = NegativeBinomial("nb_with_p_n", p=Uniform("nbp"), n=10) - # Symbolic distribution + # SymbolicRV zip = ZeroInflatedPoisson("zip", 0.5, 5) + # Nested SymbolicRV + comp_1 = ZeroInflatedPoisson.dist(0.5, 5) + comp_2 = Censored.dist(Bernoulli.dist(0.5), -1, 1) + nested_mix = Mixture("nested_mix", [0.5, 0.5], [comp_1, comp_2]) + # Expected value of outcome mu = Deterministic("mu", floatX(alpha + dot(X, b))) @@ -80,7 +86,7 @@ def setup_class(self): # add a potential as well pot = Potential("pot", mu**2) - self.distributions = [alpha, sigma, mu, b, Z, nb2, zip, Y_obs, pot] + self.distributions = [alpha, sigma, mu, b, Z, nb2, zip, nested_mix, Y_obs, pot] self.deterministics_or_potentials = [mu, pot] # tuples of (formatting, include_params self.formats = [("plain", True), ("plain", False), ("latex", True), ("latex", False)] @@ -92,7 +98,12 @@ def setup_class(self): r"beta ~ N(0, 10)", r"Z ~ N(f(), f())", r"nb_with_p_n ~ NB(10, nbp)", - r"zip ~ MarginalMixtureRV{inline=False}", + r"zip ~ MarginalMixture(f(), DiracDelta(0), Pois(5))", + ( + r"nested_mix ~ MarginalMixture(, " + r"MarginalMixture(f(), DiracDelta(0), Pois(5)), " + r"Censored(Bern(0.5), -1, 1))" + ), r"Y_obs ~ N(mu, sigma)", r"pot ~ Potential(f(beta, alpha))", ], @@ -103,7 +114,8 @@ def setup_class(self): r"beta ~ N", r"Z ~ N", r"nb_with_p_n ~ NB", - r"zip ~ MarginalMixtureRV{inline=False}", + r"zip ~ MarginalMixture", + r"nested_mix ~ MarginalMixture", r"Y_obs ~ N", r"pot ~ Potential", ], @@ -114,7 +126,12 @@ def setup_class(self): r"$\text{beta} \sim \operatorname{N}(0,~10)$", r"$\text{Z} \sim \operatorname{N}(f(),~f())$", r"$\text{nb_with_p_n} \sim \operatorname{NB}(10,~\text{nbp})$", - r"$\text{zip} \sim \text{MarginalMixtureRV{inline=False}}$", + r"$\text{zip} \sim \operatorname{MarginalMixture}(f(),~\text{\$\operatorname{DiracDelta}(0)\$},~\text{\$\operatorname{Pois}(5)\$})$", + ( + r"$\text{nested_mix} \sim \operatorname{MarginalMixture}(\text{}," + r"~\text{\$\operatorname{MarginalMixture}(f(),~\text{\\$\operatorname{DiracDelta}(0)\\$},~\text{\\$\operatorname{Pois}(5)\\$})\$}," + r"~\text{\$\operatorname{Censored}(\text{\\$\operatorname{Bern}(0.5)\\$},~-1,~1)\$})$" + ), r"$\text{Y_obs} \sim \operatorname{N}(\text{mu},~\text{sigma})$", r"$\text{pot} \sim \operatorname{Potential}(f(\text{beta},~\text{alpha}))$", ], @@ -125,7 +142,8 @@ def setup_class(self): r"$\text{beta} \sim \operatorname{N}$", r"$\text{Z} \sim \operatorname{N}$", r"$\text{nb_with_p_n} \sim \operatorname{NB}$", - r"$\text{zip} \sim \text{MarginalMixtureRV{inline=False}}$", + r"$\text{zip} \sim \operatorname{MarginalMixture}$", + r"$\text{nested_mix} \sim \operatorname{MarginalMixture}$", r"$\text{Y_obs} \sim \operatorname{N}$", r"$\text{pot} \sim \operatorname{Potential}$", ], diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py index f8ed92dcc5..89b3de7269 100644 --- a/pymc/tests/test_sampling.py +++ b/pymc/tests/test_sampling.py @@ -1697,7 +1697,7 @@ def test_volatile_parameters(self): assert {i.name for i in self.get_function_inputs(f)} == {"sigma"} assert {i.name for i in self.get_function_roots(f)} == {"mu", "sigma"} - def test_distributions_op_from_graph(self): + def test_mixture(self): with pm.Model() as model: w = pm.Dirichlet("w", a=np.ones(3), size=(5, 3)) @@ -1731,7 +1731,7 @@ def test_distributions_op_from_graph(self): assert {i.name for i in self.get_function_inputs(f)} == {"mu"} assert {i.name for i in self.get_function_roots(f)} == {"mu"} - def test_distributions_no_op_from_graph(self): + def test_censored(self): with pm.Model() as model: latent_mu = pm.Normal("latent_mu", mu=np.arange(3), sigma=1) mu = pm.Censored("mu", pm.Normal.dist(mu=latent_mu, sigma=1), lower=-1, upper=1) @@ -1788,6 +1788,25 @@ def test_lkj_cholesky_cov(self): assert {i.name for i in self.get_function_inputs(f)} == set() assert {i.name for i in self.get_function_roots(f)} == set() + def test_non_random_model_variable(self): + with pm.Model() as model: + # A user may register non-pure RandomVariables that can nevertheless be + # sampled, as long as a custom logprob is dispatched or Aeppl can infer + # its logprob (which is the case for `clip`) + y = at.clip(pm.Normal.dist(), -1, 1) + y = model.register_rv(y, name="y") + y_abs = pm.Deterministic("y_abs", at.abs(y)) + obs = pm.Normal("obs", y_abs, observed=np.zeros(10)) + + # y_abs should be resampled even if in the trace, because the source y is missing + f = compile_forward_sampling_function( + outputs=[obs], + vars_in_trace=[y_abs], + basic_rvs=model.basic_RVs, + ) + assert {i.name for i in self.get_function_inputs(f)} == set() + assert {i.name for i in self.get_function_roots(f)} == set() + def test_get_seeds_per_chain(): ret = _get_seeds_per_chain(None, chains=1)