From 37b8233d506f3a9ccff769703a69bcafcb77cf53 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Thu, 17 Nov 2022 09:07:32 +0100
Subject: [PATCH 1/6] Remove unused filter_rvs_to_jitter

---
 pymc/initial_point.py | 22 ++--------------------
 pymc/sampling/mcmc.py |  9 ++-------
 2 files changed, 4 insertions(+), 27 deletions(-)

diff --git a/pymc/initial_point.py b/pymc/initial_point.py
index 2b5d28dd51..ffa6dd4ed9 100644
--- a/pymc/initial_point.py
+++ b/pymc/initial_point.py
@@ -52,29 +52,11 @@ def convert_str_to_rv_dict(
     return initvals
 
 
-def filter_rvs_to_jitter(step) -> Set[TensorVariable]:
-    """Find the set of RVs for which the responsible step methods ask for
-    the addition of jitter to the initial point.
-
-    Parameters
-    ----------
-    step : BlockedStep or CompoundStep
-        One or many step methods that were assigned model variables.
-
-    Returns
-    -------
-    rvs_to_jitter : set
-        The random variables for which jitter should be added.
-    """
-    # TODO: implement this
-    return set()
-
-
 def make_initial_point_fns_per_chain(
     *,
     model,
     overrides: Optional[Union[StartDict, Sequence[Optional[StartDict]]]],
-    jitter_rvs: Set[TensorVariable],
+    jitter_rvs: Optional[Set[TensorVariable]] = None,
     chains: int,
 ) -> List[Callable]:
     """Create an initial point function for each chain, as defined by initvals
@@ -87,7 +69,7 @@ def make_initial_point_fns_per_chain(
     overrides : optional, list or dict
         Initial value strategy overrides that should take precedence over the defaults from the model.
         A sequence of None or dicts will be treated as chain-wise strategies and must have the same length as `seeds`.
-    jitter_rvs : set
+    jitter_rvs : set, optional
         Random variable tensors for which U(-1, 1) jitter shall be applied.
         (To the transformed space if applicable.)
 
diff --git a/pymc/sampling/mcmc.py b/pymc/sampling/mcmc.py
index f111eb0d87..a0362497fe 100644
--- a/pymc/sampling/mcmc.py
+++ b/pymc/sampling/mcmc.py
@@ -37,12 +37,7 @@
 from pymc.backends.base import BaseTrace, MultiTrace, _choose_chains
 from pymc.blocking import DictToArrayBijection
 from pymc.exceptions import SamplingError
-from pymc.initial_point import (
-    PointType,
-    StartDict,
-    filter_rvs_to_jitter,
-    make_initial_point_fns_per_chain,
-)
+from pymc.initial_point import PointType, StartDict, make_initial_point_fns_per_chain
 from pymc.model import Model, modelcontext
 from pymc.sampling.parallel import Draw, _cpu_count
 from pymc.sampling.population import _sample_population
@@ -476,7 +471,7 @@ def sample(
         ipfns = make_initial_point_fns_per_chain(
             model=model,
             overrides=initvals,
-            jitter_rvs=filter_rvs_to_jitter(step),
+            jitter_rvs=set(),
             chains=chains,
         )
         initial_points = [ipfn(seed) for ipfn, seed in zip(ipfns, random_seed_list)]

From c2138f3027087604d15e3a904905d65adf33f230 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Thu, 17 Nov 2022 09:13:24 +0100
Subject: [PATCH 2/6] Rename data kwarg to observed in Model register_rv

---
 pymc/model.py                  | 23 +++++++++++------------
 pymc/tests/test_model.py       |  2 +-
 pymc/tests/test_model_graph.py |  2 +-
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/pymc/model.py b/pymc/model.py
index 37e77ffb9d..463d9b0296 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -1257,7 +1257,7 @@ def set_data(
         shared_object.set_value(values)
 
     def register_rv(
-        self, rv_var, name, data=None, total_size=None, dims=None, transform=UNSET, initval=None
+        self, rv_var, name, observed=None, total_size=None, dims=None, transform=UNSET, initval=None
     ):
         """Register an (un)observed random variable with the model.
 
@@ -1266,9 +1266,8 @@ def register_rv(
         rv_var: TensorVariable
         name: str
             Intended name for the model variable.
-        data: array_like (optional)
-            If data is provided, the variable is observed. If None,
-            the variable is unobserved.
+        observed: array_like (optional)
+            Data values for observed variables.
         total_size: scalar
             upscales logp of variable with ``coef = total_size/var.shape[0]``
         dims: tuple
@@ -1295,31 +1294,31 @@ def register_rv(
                 if dname not in self.dim_lengths:
                     self.add_coord(dname, values=None, length=rv_var.shape[d])
 
-        if data is None:
+        if observed is None:
             self.free_RVs.append(rv_var)
             self.create_value_var(rv_var, transform)
             self.add_random_variable(rv_var, dims)
             self.set_initval(rv_var, initval)
         else:
             if (
-                isinstance(data, Variable)
-                and not isinstance(data, (GenTensorVariable, Minibatch))
-                and data.owner is not None
+                isinstance(observed, Variable)
+                and not isinstance(observed, (GenTensorVariable, Minibatch))
+                and observed.owner is not None
                 # The only Aesara operation we allow on observed data is type casting
                 # Although we could allow for any graph that does not depend on other RVs
                 and not (
-                    isinstance(data.owner.op, Elemwise)
-                    and isinstance(data.owner.op.scalar_op, Cast)
+                    isinstance(observed.owner.op, Elemwise)
+                    and isinstance(observed.owner.op.scalar_op, Cast)
                 )
             ):
                 raise TypeError(
                     "Variables that depend on other nodes cannot be used for observed data."
-                    f"The data variable was: {data}"
+                    f"The data variable was: {observed}"
                 )
 
             # `rv_var` is potentially changed by `make_obs_var`,
             # for example into a new graph for imputation of missing data.
-            rv_var = self.make_obs_var(rv_var, data, dims, transform)
+            rv_var = self.make_obs_var(rv_var, observed, dims, transform)
 
         return rv_var
 
diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py
index 68f99d1e8a..0d22729e75 100644
--- a/pymc/tests/test_model.py
+++ b/pymc/tests/test_model.py
@@ -1445,7 +1445,7 @@ def test_tag_future_warning_model():
         assert y is not x
         assert y.tag is not x.tag
         assert isinstance(y.tag, _FutureWarningValidatingScratchpad)
-        y = model.register_rv(y, name="y", data=5)
+        y = model.register_rv(y, name="y", observed=5)
         assert isinstance(y.tag, _FutureWarningValidatingScratchpad)
 
         # Test expected warnings
diff --git a/pymc/tests/test_model_graph.py b/pymc/tests/test_model_graph.py
index 742281dc29..b19c3b7f30 100644
--- a/pymc/tests/test_model_graph.py
+++ b/pymc/tests/test_model_graph.py
@@ -247,7 +247,7 @@ def model_non_random_variable_rvs():
 
         z_raw = pm.Normal.dist(y, shape=(5,))
         z = pm.math.clip(z_raw, -1, 1)
-        model.register_rv(z, name="z", data=[0] * 5)
+        model.register_rv(z, name="z", observed=[0] * 5)
 
     compute_graph = {
         "mu": set(),

From 088fa9dffa437c032c656881eb9bdba5fe0f6735 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Thu, 17 Nov 2022 11:27:46 +0100
Subject: [PATCH 3/6] Do not add transformed value names to `named_vars`

This also disables prior_predictive sampling of transformed variables
---
 pymc/model.py                               | 12 +++---
 pymc/sampling/forward.py                    | 21 ++--------
 pymc/smc/kernels.py                         | 27 ++++++++-----
 pymc/tests/distributions/test_continuous.py |  1 -
 pymc/tests/sampling/test_forward.py         | 44 ++-------------------
 pymc/tests/test_model.py                    |  5 +--
 6 files changed, 33 insertions(+), 77 deletions(-)

diff --git a/pymc/model.py b/pymc/model.py
index 463d9b0296..d3a6086da5 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -1480,7 +1480,6 @@ def create_value_var(
                 value_var.tag.test_value = transform.forward(
                     value_var, *rv_var.owner.inputs
                 ).tag.test_value
-            self.named_vars[value_var.name] = value_var
         self.rvs_to_transforms[rv_var] = transform
         self.rvs_to_values[rv_var] = value_var
         self.values_to_rvs[value_var] = rv_var
@@ -1704,14 +1703,17 @@ def check_start_vals(self, start):
         None
         """
         start_points = [start] if isinstance(start, dict) else start
+
+        value_names_to_dtypes = {value.name: value.dtype for value in self.value_vars}
+        value_names_set = set(value_names_to_dtypes.keys())
         for elem in start_points:
 
             for k, v in elem.items():
-                elem[k] = np.asarray(v, dtype=self[k].dtype)
+                elem[k] = np.asarray(v, dtype=value_names_to_dtypes[k])
 
-            if not set(elem.keys()).issubset(self.named_vars.keys()):
-                extra_keys = ", ".join(set(elem.keys()) - set(self.named_vars.keys()))
-                valid_keys = ", ".join(self.named_vars.keys())
+            if not set(elem.keys()).issubset(value_names_set):
+                extra_keys = ", ".join(set(elem.keys()) - value_names_set)
+                valid_keys = ", ".join(value_names_set)
                 raise KeyError(
                     "Some start parameters do not appear in the model!\n"
                     f"Valid keys are: {valid_keys}, but {extra_keys} was supplied"
diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py
index ed91b77f3f..7f758ead47 100644
--- a/pymc/sampling/forward.py
+++ b/pymc/sampling/forward.py
@@ -343,7 +343,7 @@ def sample_prior_predictive(
     var_names : Iterable[str]
         A list of names of variables for which to compute the prior predictive
         samples. Defaults to both observed and unobserved RVs. Transformed values
-        are not included unless explicitly defined in var_names.
+        are not allowed.
     random_seed : int, RandomState or Generator, optional
         Seed for the random number generator.
     return_inferencedata : bool
@@ -382,23 +382,10 @@ def sample_prior_predictive(
     names = sorted(get_default_varnames(vars_, include_transformed=False))
     vars_to_sample = [model[name] for name in names]
 
-    # Any variables from var_names that are missing must be transformed variables.
-    # Misspelled variables would have raised a KeyError above.
+    # Any variables from var_names still missing are assumed to be transformed variables.
     missing_names = vars_.difference(names)
-    for name in sorted(missing_names):
-        transformed_value_var = model[name]
-        rv_var = model.values_to_rvs[transformed_value_var]
-        transform = model.rvs_to_transforms[rv_var]
-        transformed_rv_var = transform.forward(rv_var, *rv_var.owner.inputs)
-
-        names.append(name)
-        vars_to_sample.append(transformed_rv_var)
-
-        # If the user asked for the transformed variable in var_names, but not the
-        # original RV, we add it manually here
-        if rv_var.name not in names:
-            names.append(rv_var.name)
-            vars_to_sample.append(rv_var)
+    if missing_names:
+        raise ValueError(f"Unrecognized var_names: {missing_names}")
 
     if random_seed is not None:
         (random_seed,) = _get_seeds_per_chain(random_seed, 1)
diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py
index 666581e15e..04131a0ddc 100644
--- a/pymc/smc/kernels.py
+++ b/pymc/smc/kernels.py
@@ -33,8 +33,9 @@
 )
 from pymc.backends.ndarray import NDArray
 from pymc.blocking import DictToArrayBijection
+from pymc.initial_point import make_initial_point_expression
 from pymc.model import Point, modelcontext
-from pymc.sampling.forward import sample_prior_predictive
+from pymc.sampling.forward import draw
 from pymc.step_methods.metropolis import MultivariateNormalProposal
 from pymc.vartypes import discrete_types
 
@@ -182,13 +183,20 @@ def initialize_population(self) -> Dict[str, np.ndarray]:
                 "ignore", category=UserWarning, message="The effect of Potentials"
             )
 
-            result = sample_prior_predictive(
-                self.draws,
-                var_names=[v.name for v in self.model.unobserved_value_vars],
-                model=self.model,
-                return_inferencedata=False,
+            model = self.model
+            prior_expression = make_initial_point_expression(
+                free_rvs=model.free_RVs,
+                rvs_to_transforms=model.rvs_to_transforms,
+                initval_strategies={},
+                default_strategy="prior",
+                return_transformed=True,
             )
-        return cast(Dict[str, np.ndarray], result)
+            prior_values = draw(prior_expression, draws=self.draws, random_seed=self.rng)
+
+            names = [model.rvs_to_values[rv].name for rv in model.free_RVs]
+            dict_prior = {k: np.stack(v) for k, v in zip(names, prior_values)}
+
+        return cast(Dict[str, np.ndarray], dict_prior)
 
     def _initialize_kernel(self):
         """Create variables and logp function necessary to run kernel
@@ -325,12 +333,11 @@ def _posterior_to_trace(self, chain=0) -> NDArray:
         for i in range(lenght_pos):
             value = []
             size = 0
-            for varname in varnames:
-                shape, new_size = self.var_info[varname]
+            for var in self.variables:
+                shape, new_size = self.var_info[var.name]
                 var_samples = self.tempered_posterior[i][size : size + new_size]
                 # Round discrete variable samples. The rounded values were the ones
                 # actually used in the logp evaluations (see logp_forw)
-                var = self.model[varname]
                 if var.dtype in discrete_types:
                     var_samples = np.round(var_samples).astype(var.dtype)
                 value.append(var_samples.reshape(shape))
diff --git a/pymc/tests/distributions/test_continuous.py b/pymc/tests/distributions/test_continuous.py
index 952b6087dd..1e896a7658 100644
--- a/pymc/tests/distributions/test_continuous.py
+++ b/pymc/tests/distributions/test_continuous.py
@@ -70,7 +70,6 @@ def random_polyagamma(*args, **kwargs):
 
 class TestBoundedContinuous:
     def get_dist_params_and_interval_bounds(self, model, rv_name):
-        interval_rv = model.named_vars[f"{rv_name}_interval__"]
         rv = model.named_vars[rv_name]
         dist_params = rv.owner.inputs
         lower_interval, upper_interval = model.rvs_to_transforms[rv].args_fn(*rv.owner.inputs)
diff --git a/pymc/tests/sampling/test_forward.py b/pymc/tests/sampling/test_forward.py
index 28a33c418d..447775bc01 100644
--- a/pymc/tests/sampling/test_forward.py
+++ b/pymc/tests/sampling/test_forward.py
@@ -1175,51 +1175,13 @@ def test_potentials_warning(self):
             with pytest.warns(UserWarning, match=warning_msg):
                 pm.sample_prior_predictive(samples=5)
 
-    def test_transformed_vars(self):
-        # Test that prior predictive returns transformation of RVs when these are
-        # passed explicitly in `var_names`
-
-        def ub_interval_forward(x, ub):
-            # Interval transform assuming lower bound is zero
-            return np.log(x - 0) - np.log(ub - x)
-
+    def test_transformed_vars_not_supported(self):
         with pm.Model() as model:
             ub = pm.HalfNormal("ub", 10)
             x = pm.Uniform("x", 0, ub)
 
-            prior = pm.sample_prior_predictive(
-                var_names=["ub", "ub_log__", "x", "x_interval__"],
-                samples=10,
-                random_seed=123,
-            )
-
-        # Check values are correct
-        assert np.allclose(prior.prior["ub_log__"].data, np.log(prior.prior["ub"].data))
-        assert np.allclose(
-            prior.prior["x_interval__"].data,
-            ub_interval_forward(prior.prior["x"].data, prior.prior["ub"].data),
-        )
-
-        # Check that it works when the original RVs are not mentioned in var_names
-        with pm.Model() as model_transformed_only:
-            ub = pm.HalfNormal("ub", 10)
-            x = pm.Uniform("x", 0, ub)
-
-            prior_transformed_only = pm.sample_prior_predictive(
-                var_names=["ub_log__", "x_interval__"],
-                samples=10,
-                random_seed=123,
-            )
-        assert (
-            "ub" not in prior_transformed_only.prior.data_vars
-            and "x" not in prior_transformed_only.prior.data_vars
-        )
-        assert np.allclose(
-            prior.prior["ub_log__"].data, prior_transformed_only.prior["ub_log__"].data
-        )
-        assert np.allclose(
-            prior.prior["x_interval__"], prior_transformed_only.prior["x_interval__"].data
-        )
+            with pytest.raises(ValueError, match="Unrecognized var_names"):
+                pm.sample_prior_predictive(var_names=["ub", "ub_log__", "x", "x_interval__"])
 
     def test_issue_4490(self):
         # Test that samples do not depend on var_name order or, more fundamentally,
diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py
index 0d22729e75..2a6c240e34 100644
--- a/pymc/tests/test_model.py
+++ b/pymc/tests/test_model.py
@@ -1206,9 +1206,8 @@ def test_interval_missing_observations(self):
             with pytest.warns(ImputationWarning):
                 theta2 = pm.Normal("theta2", mu=theta1, observed=obs2, rng=rng)
 
-            assert "theta1_observed" in model.named_vars
-            assert "theta1_missing_interval__" in model.named_vars
-            assert model.rvs_to_transforms[model.named_vars["theta1_observed"]] is None
+            assert isinstance(model.rvs_to_transforms[model["theta1_missing"]], IntervalTransform)
+            assert model.rvs_to_transforms[model["theta1_observed"]] is None
 
             prior_trace = pm.sample_prior_predictive(return_inferencedata=False)
 

From dc41794a6ed8a81c6e73babf9c5fddf62604b403 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Thu, 17 Nov 2022 09:24:30 +0100
Subject: [PATCH 4/6] Rename add_random_variable to add_named_variable and
 _RV_dims to named_vars_to_dims

---
 pymc/backends/arviz.py                       | 11 +++----
 pymc/data.py                                 |  2 +-
 pymc/model.py                                | 32 ++++++++++++--------
 pymc/model_graph.py                          |  4 +--
 pymc/sampling/forward.py                     |  9 +++---
 pymc/sampling/jax.py                         | 22 +++++---------
 pymc/tests/distributions/test_shape_utils.py |  6 ++--
 pymc/tests/sampling/test_jax.py              |  2 +-
 pymc/tests/test_data.py                      |  6 ++--
 pymc/tests/test_model.py                     |  4 +--
 pymc/util.py                                 |  2 +-
 11 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/pymc/backends/arviz.py b/pymc/backends/arviz.py
index 7e2dcda5f6..93a0a2a147 100644
--- a/pymc/backends/arviz.py
+++ b/pymc/backends/arviz.py
@@ -215,12 +215,11 @@ def __init__(
         }
 
         self.dims = {} if dims is None else dims
-        if hasattr(self.model, "RV_dims"):
-            model_dims = {
-                var_name: [dim for dim in dims if dim is not None]
-                for var_name, dims in self.model.RV_dims.items()
-            }
-            self.dims = {**model_dims, **self.dims}
+        model_dims = {
+            var_name: [dim for dim in dims if dim is not None]
+            for var_name, dims in self.model.named_vars_to_dims.items()
+        }
+        self.dims = {**model_dims, **self.dims}
         if sample_dims is None:
             sample_dims = ["chain", "draw"]
         self.sample_dims = sample_dims
diff --git a/pymc/data.py b/pymc/data.py
index 54042f8f75..4bdac41962 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -718,6 +718,6 @@ def Data(
                     length=xshape[d],
                 )
 
-    model.add_random_variable(x, dims=dims)
+    model.add_named_variable(x, dims=dims)
 
     return x
diff --git a/pymc/model.py b/pymc/model.py
index d3a6086da5..1aac036a6f 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -554,6 +554,7 @@ def __init__(
 
         if self.parent is not None:
             self.named_vars = treedict(parent=self.parent.named_vars)
+            self.named_vars_to_dims = treedict(parent=self.parent.named_vars_to_dims)
             self.values_to_rvs = treedict(parent=self.parent.values_to_rvs)
             self.rvs_to_values = treedict(parent=self.parent.rvs_to_values)
             self.rvs_to_transforms = treedict(parent=self.parent.rvs_to_transforms)
@@ -564,10 +565,10 @@ def __init__(
             self.deterministics = treelist(parent=self.parent.deterministics)
             self.potentials = treelist(parent=self.parent.potentials)
             self._coords = self.parent._coords
-            self._RV_dims = treedict(parent=self.parent._RV_dims)
             self._dim_lengths = self.parent._dim_lengths
         else:
             self.named_vars = treedict()
+            self.named_vars_to_dims = treedict()
             self.values_to_rvs = treedict()
             self.rvs_to_values = treedict()
             self.rvs_to_transforms = treedict()
@@ -578,7 +579,6 @@ def __init__(
             self.deterministics = treelist()
             self.potentials = treelist()
             self._coords = {}
-            self._RV_dims = treedict()
             self._dim_lengths = {}
         self.add_coords(coords)
 
@@ -972,7 +972,11 @@ def RV_dims(self) -> Dict[str, Tuple[Union[str, None], ...]]:
 
         Entries in the tuples may be ``None``, if the RV dimension was not given a name.
         """
-        return self._RV_dims
+        warnings.warn(
+            "Model.RV_dims is deprecated. User Model.named_vars_to_dims instead.",
+            FutureWarning,
+        )
+        return self.named_vars_to_dims
 
     @property
     def coords(self) -> Dict[str, Union[Tuple, None]]:
@@ -1167,7 +1171,7 @@ def set_data(
         if isinstance(values, list):
             values = np.array(values)
         values = convert_observed_data(values)
-        dims = self.RV_dims.get(name, None) or ()
+        dims = self.named_vars_to_dims.get(name, None) or ()
         coords = coords or {}
 
         if values.ndim != shared_object.ndim:
@@ -1297,7 +1301,7 @@ def register_rv(
         if observed is None:
             self.free_RVs.append(rv_var)
             self.create_value_var(rv_var, transform)
-            self.add_random_variable(rv_var, dims)
+            self.add_named_variable(rv_var, dims)
             self.set_initval(rv_var, initval)
         else:
             if (
@@ -1424,7 +1428,7 @@ def make_obs_var(
             observed_rv_var.tag.observations = nonmissing_data
 
             self.create_value_var(observed_rv_var, transform=None, value_var=nonmissing_data)
-            self.add_random_variable(observed_rv_var)
+            self.add_named_variable(observed_rv_var)
             self.observed_RVs.append(observed_rv_var)
 
             # Create deterministic that combines observed and missing
@@ -1440,7 +1444,7 @@ def make_obs_var(
                 data = at.as_tensor_variable(data, name=name)
             rv_var.tag.observations = data
             self.create_value_var(rv_var, transform=None, value_var=data)
-            self.add_random_variable(rv_var, dims)
+            self.add_named_variable(rv_var, dims)
             self.observed_RVs.append(rv_var)
 
         return rv_var
@@ -1486,8 +1490,12 @@ def create_value_var(
 
         return value_var
 
-    def add_random_variable(self, var, dims: Optional[Tuple[Union[str, None], ...]] = None):
-        """Add a random variable to the named variables of the model."""
+    def add_named_variable(self, var, dims: Optional[Tuple[Union[str, None], ...]] = None):
+        """Add a random graph variable to the named variables of the model.
+
+        This can include several types of variables such basic_RVs, Data, Deterministics,
+        and Potentials.
+        """
         if self.named_vars.tree_contains(var.name):
             raise ValueError(f"Variable name {var.name} already exists.")
 
@@ -1499,7 +1507,7 @@ def add_random_variable(self, var, dims: Optional[Tuple[Union[str, None], ...]]
                     raise ValueError(f"Dimension {dim} is not specified in `coords`.")
             if any(var.name == dim for dim in dims):
                 raise ValueError(f"Variable `{var.name}` has the same name as its dimension label.")
-            self._RV_dims[var.name] = dims
+            self.named_vars_to_dims[var.name] = dims
 
         self.named_vars[var.name] = var
         if not hasattr(self, self.name_of(var.name)):
@@ -1967,7 +1975,7 @@ def Deterministic(name, var, model=None, dims=None, auto=False):
         model.auto_deterministics.append(var)
     else:
         model.deterministics.append(var)
-    model.add_random_variable(var, dims)
+    model.add_named_variable(var, dims)
 
     from pymc.printing import str_for_potential_or_deterministic
 
@@ -1999,7 +2007,7 @@ def Potential(name, var, model=None):
     model = modelcontext(model)
     var.name = model.name_for(name)
     model.potentials.append(var)
-    model.add_random_variable(var)
+    model.add_named_variable(var)
 
     from pymc.printing import str_for_potential_or_deterministic
 
diff --git a/pymc/model_graph.py b/pymc/model_graph.py
index d44bec8842..3bc272a91f 100644
--- a/pymc/model_graph.py
+++ b/pymc/model_graph.py
@@ -200,10 +200,10 @@ def get_plates(self, var_names: Optional[Iterable[VarName]] = None) -> Dict[str,
 
         for var_name in self.vars_to_plot(var_names):
             v = self.model[var_name]
-            if var_name in self.model.RV_dims:
+            if var_name in self.model.named_vars_to_dims:
                 plate_label = " x ".join(
                     f"{d} ({self._eval(self.model.dim_lengths[d])})"
-                    for d in self.model.RV_dims[var_name]
+                    for d in self.model.named_vars_to_dims[var_name]
                 )
             else:
                 plate_label = " x ".join(map(str, self._eval(v.shape)))
diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py
index 7f758ead47..ee5b2f304a 100644
--- a/pymc/sampling/forward.py
+++ b/pymc/sampling/forward.py
@@ -371,11 +371,10 @@ def sample_prior_predictive(
         )
 
     if var_names is None:
-        prior_pred_vars = model.observed_RVs + model.auto_deterministics
-        prior_vars = (
-            get_default_varnames(model.unobserved_RVs, include_transformed=True) + model.potentials
-        )
-        vars_: Set[str] = {var.name for var in prior_vars + prior_pred_vars}
+        vars_: Set[str] = {
+            var.name
+            for var in model.basic_RVs + model.deterministics + model.auto_deterministics
+        }
     else:
         vars_ = set(var_names)
 
diff --git a/pymc/sampling/jax.py b/pymc/sampling/jax.py
index 040760fdd0..880e6c5cdb 100644
--- a/pymc/sampling/jax.py
+++ b/pymc/sampling/jax.py
@@ -343,13 +343,10 @@ def sample_blackjax_nuts(
         if cvals is not None
     }
 
-    if hasattr(model, "RV_dims"):
-        dims = {
-            var_name: [dim for dim in dims if dim is not None]
-            for var_name, dims in model.RV_dims.items()
-        }
-    else:
-        dims = {}
+    dims = {
+        var_name: [dim for dim in dims if dim is not None]
+        for var_name, dims in model.named_vars_to_dims.items()
+    }
 
     (random_seed,) = _get_seeds_per_chain(random_seed, 1)
 
@@ -559,13 +556,10 @@ def sample_numpyro_nuts(
         if cvals is not None
     }
 
-    if hasattr(model, "RV_dims"):
-        dims = {
-            var_name: [dim for dim in dims if dim is not None]
-            for var_name, dims in model.RV_dims.items()
-        }
-    else:
-        dims = {}
+    dims = {
+        var_name: [dim for dim in dims if dim is not None]
+        for var_name, dims in model.named_vars_to_dims.items()
+    }
 
     (random_seed,) = _get_seeds_per_chain(random_seed, 1)
 
diff --git a/pymc/tests/distributions/test_shape_utils.py b/pymc/tests/distributions/test_shape_utils.py
index bff0c2e15d..b473227834 100644
--- a/pymc/tests/distributions/test_shape_utils.py
+++ b/pymc/tests/distributions/test_shape_utils.py
@@ -288,7 +288,7 @@ def test_simultaneous_shape_and_dims(self):
             # The shape and dims tuples correspond to each other.
             # Note: No checks are performed that implied shape (x), shape and dims actually match.
             y = pm.Normal("y", mu=x, shape=(2, 3), dims=("dshape", "ddata"))
-            assert pmodel.RV_dims["y"] == ("dshape", "ddata")
+            assert pmodel.named_vars_to_dims["y"] == ("dshape", "ddata")
 
             assert "dshape" in pmodel.dim_lengths
             assert y.eval().shape == (2, 3)
@@ -301,7 +301,7 @@ def test_simultaneous_size_and_dims(self):
             # Size does not include support dims, so this test must use a dist with support dims.
             kwargs = dict(name="y", size=(2, 3), mu=at.ones((3, 4)), cov=at.eye(4))
             y = pm.MvNormal(**kwargs, dims=("dsize", "ddata", "dsupport"))
-            assert pmodel.RV_dims["y"] == ("dsize", "ddata", "dsupport")
+            assert pmodel.named_vars_to_dims["y"] == ("dsize", "ddata", "dsupport")
 
             assert "dsize" in pmodel.dim_lengths
             assert y.eval().shape == (2, 3, 4)
@@ -313,7 +313,7 @@ def test_simultaneous_dims_and_observed(self):
 
             # Note: No checks are performed that observed and dims actually match.
             y = pm.Normal("y", observed=[0, 0, 0], dims="ddata")
-            assert pmodel.RV_dims["y"] == ("ddata",)
+            assert pmodel.named_vars_to_dims["y"] == ("ddata",)
             assert y.eval().shape == (3,)
 
     def test_define_dims_on_the_fly_raises(self):
diff --git a/pymc/tests/sampling/test_jax.py b/pymc/tests/sampling/test_jax.py
index a6e97138b2..2f2cb553b8 100644
--- a/pymc/tests/sampling/test_jax.py
+++ b/pymc/tests/sampling/test_jax.py
@@ -235,7 +235,7 @@ def test_idata_kwargs(
 
     posterior = idata.get("posterior")
     assert posterior is not None
-    x_dim_expected = idata_kwargs.get("dims", model_test_idata_kwargs.RV_dims)["x"][0]
+    x_dim_expected = idata_kwargs.get("dims", model_test_idata_kwargs.named_vars_to_dims)["x"][0]
     assert x_dim_expected is not None
     assert posterior["x"].dims[-1] == x_dim_expected
 
diff --git a/pymc/tests/test_data.py b/pymc/tests/test_data.py
index 6983f3e342..52b18705ba 100644
--- a/pymc/tests/test_data.py
+++ b/pymc/tests/test_data.py
@@ -331,7 +331,7 @@ def test_explicit_coords(self):
         assert pmodel.dim_lengths["rows"].eval() == 5
         assert "columns" in pmodel.coords
         assert pmodel.coords["columns"] == ("C1", "C2", "C3", "C4", "C5", "C6", "C7")
-        assert pmodel.RV_dims == {"observations": ("rows", "columns")}
+        assert pmodel.named_vars_to_dims == {"observations": ("rows", "columns")}
         assert "columns" in pmodel.dim_lengths
         assert pmodel.dim_lengths["columns"].eval() == 7
 
@@ -382,7 +382,7 @@ def test_implicit_coords_series(self):
 
         assert "date" in pmodel.coords
         assert len(pmodel.coords["date"]) == 22
-        assert pmodel.RV_dims == {"sales": ("date",)}
+        assert pmodel.named_vars_to_dims == {"sales": ("date",)}
 
     def test_implicit_coords_dataframe(self):
         pd = pytest.importorskip("pandas")
@@ -402,7 +402,7 @@ def test_implicit_coords_dataframe(self):
 
         assert "rows" in pmodel.coords
         assert "columns" in pmodel.coords
-        assert pmodel.RV_dims == {"observations": ("rows", "columns")}
+        assert pmodel.named_vars_to_dims == {"observations": ("rows", "columns")}
 
     def test_data_kwargs(self):
         strict_value = True
diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py
index 2a6c240e34..51e656b136 100644
--- a/pymc/tests/test_model.py
+++ b/pymc/tests/test_model.py
@@ -726,7 +726,7 @@ def test_nested_model_coords():
         e = pm.Normal("e", a[None] + d[:, None], dims=("dim2", "dim1"))
     assert m1.coords is m2.coords
     assert m1.dim_lengths is m2.dim_lengths
-    assert set(m2.RV_dims) < set(m1.RV_dims)
+    assert set(m2.named_vars_to_dims) < set(m1.named_vars_to_dims)
 
 
 def test_shapeerror_from_set_data_dimensionality():
@@ -1378,7 +1378,7 @@ def test_dims(self):
         with pm.Model(coords={"observed": range(10)}) as model:
             with pytest.warns(ImputationWarning):
                 x = pm.Normal("x", observed=data, dims=("observed",))
-        assert model.RV_dims == {"x": ("observed",)}
+        assert model.named_vars_to_dims == {"x": ("observed",)}
 
     def test_error_non_random_variable(self):
         data = np.array([np.nan] * 3 + [0] * 7)
diff --git a/pymc/util.py b/pymc/util.py
index 010ab20baf..cbeceb34c5 100644
--- a/pymc/util.py
+++ b/pymc/util.py
@@ -120,7 +120,7 @@ def __init__(self, iterable=(), parent=None, **kwargs):
     update = withparent(dict.update)
 
     def tree_contains(self, item):
-        # needed for `add_random_variable` method
+        # needed for `add_named_variable` method
         if isinstance(self.parent, treedict):
             return dict.__contains__(self, item) or self.parent.tree_contains(item)
         elif isinstance(self.parent, dict):

From 0b1f0ba2116f1432c977f4b2798b7424273d167a Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Thu, 17 Nov 2022 09:52:41 +0100
Subject: [PATCH 5/6] Rename Model initial_values to rvs_to_initial_values

---
 pymc/initial_point.py            |  2 +-
 pymc/model.py                    | 11 +++++++----
 pymc/tests/test_initial_point.py |  4 ++--
 pymc/tests/test_model.py         |  8 ++++----
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/pymc/initial_point.py b/pymc/initial_point.py
index ffa6dd4ed9..f2feb8185f 100644
--- a/pymc/initial_point.py
+++ b/pymc/initial_point.py
@@ -133,7 +133,7 @@ def make_initial_point_fn(
 
     sdict_overrides = convert_str_to_rv_dict(model, overrides or {})
     initval_strats = {
-        **model.initial_values,
+        **model.rvs_to_initial_values,
         **sdict_overrides,
     }
 
diff --git a/pymc/model.py b/pymc/model.py
index 1aac036a6f..91813e81a9 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -550,8 +550,6 @@ def __init__(
         self.name = self._validate_name(name)
         self.check_bounds = check_bounds
 
-        self._initial_values: Dict[TensorVariable, Optional[Union[np.ndarray, Variable, str]]] = {}
-
         if self.parent is not None:
             self.named_vars = treedict(parent=self.parent.named_vars)
             self.named_vars_to_dims = treedict(parent=self.parent.named_vars_to_dims)
@@ -559,6 +557,7 @@ def __init__(
             self.rvs_to_values = treedict(parent=self.parent.rvs_to_values)
             self.rvs_to_transforms = treedict(parent=self.parent.rvs_to_transforms)
             self.rvs_to_total_sizes = treedict(parent=self.parent.rvs_to_total_sizes)
+            self.rvs_to_initial_values = treedict(parent=self.parent.rvs_to_initial_values)
             self.free_RVs = treelist(parent=self.parent.free_RVs)
             self.observed_RVs = treelist(parent=self.parent.observed_RVs)
             self.auto_deterministics = treelist(parent=self.parent.auto_deterministics)
@@ -573,6 +572,7 @@ def __init__(
             self.rvs_to_values = treedict()
             self.rvs_to_transforms = treedict()
             self.rvs_to_total_sizes = treedict()
+            self.rvs_to_initial_values = treedict()
             self.free_RVs = treelist()
             self.observed_RVs = treelist()
             self.auto_deterministics = treelist()
@@ -1128,7 +1128,10 @@ def initial_values(self) -> Dict[TensorVariable, Optional[Union[np.ndarray, Vari
         Keys are the random variables (as returned by e.g. ``pm.Uniform()``) and
         values are the numeric/symbolic initial values, strings denoting the strategy to get them, or None.
         """
-        return self._initial_values
+        warnings.warn(
+            "Model.initial_values is deprecated. Use Model.rvs_to_initial_values instead."
+        )
+        return self.rvs_to_initial_values
 
     def set_initval(self, rv_var, initval):
         """Sets an initial value (strategy) for a random variable."""
@@ -1136,7 +1139,7 @@ def set_initval(self, rv_var, initval):
             # Convert scalars or array-like inputs to ndarrays
             initval = rv_var.type.filter(initval)
 
-        self.initial_values[rv_var] = initval
+        self.rvs_to_initial_values[rv_var] = initval
 
     def set_data(
         self,
diff --git a/pymc/tests/test_initial_point.py b/pymc/tests/test_initial_point.py
index 837df9b3aa..9c7a529aba 100644
--- a/pymc/tests/test_initial_point.py
+++ b/pymc/tests/test_initial_point.py
@@ -86,7 +86,7 @@ def test_dependent_initvals(self):
             assert ip["B2_interval__"] == 0
 
             # Modify initval of L and re-evaluate
-            pmodel.initial_values[U] = 9.9
+            pmodel.rvs_to_initial_values[U] = 9.9
             ip = pmodel.initial_point(random_seed=0)
             assert ip["B1_interval__"] < 0
             assert ip["B2_interval__"] == 0
@@ -108,7 +108,7 @@ def test_nested_initvals(self):
         ip_vals = list(make_initial_point_fn(model=pmodel, return_transformed=False)(0).values())
         assert np.allclose(ip_vals, [1, 2, 4, 8, 16, 32], rtol=1e-3)
 
-        pmodel.initial_values[four] = 1
+        pmodel.rvs_to_initial_values[four] = 1
 
         ip_vals = list(make_initial_point_fn(model=pmodel, return_transformed=True)(0).values())
         assert np.allclose(np.exp(ip_vals), [1, 2, 4, 1, 2, 4], rtol=1e-3)
diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py
index 51e656b136..9f767db1f7 100644
--- a/pymc/tests/test_model.py
+++ b/pymc/tests/test_model.py
@@ -586,11 +586,11 @@ def test_initial_point():
     with model:
         y = pm.Normal("y", initval=y_initval)
 
-    assert a in model.initial_values
-    assert x in model.initial_values
-    assert model.initial_values[b] == b_initval
+    assert a in model.rvs_to_initial_values
+    assert x in model.rvs_to_initial_values
+    assert model.rvs_to_initial_values[b] == b_initval
     assert model.initial_point(0)["b_interval__"] == b_initval_trans
-    assert model.initial_values[y] == y_initval
+    assert model.rvs_to_initial_values[y] == y_initval
 
 
 def test_point_logps():

From 60f15e11c3ad6ab4f6bd4c982a8249d3a10f9326 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Thu, 17 Nov 2022 10:58:26 +0100
Subject: [PATCH 6/6] Remove Model auto_deterministics

This property was initially added just to handle deterministics created by automatic imputation, in order to ensure the combined tensor of missing and observed components showed up in prior and posterior predictive sampling. At the same time, it allowed hiding the deterministic during mcmc sampling, saving memory use for large datasets. This last benefit is lost for the sake of simplicity. If a user is concerned, they can manually split the observed and missing components of a dataset when defining their model.
---
 pymc/model.py                       | 12 ++----
 pymc/sampling/forward.py            | 28 +++++++++++---
 pymc/tests/sampling/test_forward.py | 17 +++++++++
 pymc/tests/test_model.py            | 58 ++++++++++++++++++++++-------
 4 files changed, 88 insertions(+), 27 deletions(-)

diff --git a/pymc/model.py b/pymc/model.py
index 91813e81a9..efb50b32bc 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -560,7 +560,6 @@ def __init__(
             self.rvs_to_initial_values = treedict(parent=self.parent.rvs_to_initial_values)
             self.free_RVs = treelist(parent=self.parent.free_RVs)
             self.observed_RVs = treelist(parent=self.parent.observed_RVs)
-            self.auto_deterministics = treelist(parent=self.parent.auto_deterministics)
             self.deterministics = treelist(parent=self.parent.deterministics)
             self.potentials = treelist(parent=self.parent.potentials)
             self._coords = self.parent._coords
@@ -575,7 +574,6 @@ def __init__(
             self.rvs_to_initial_values = treedict()
             self.free_RVs = treelist()
             self.observed_RVs = treelist()
-            self.auto_deterministics = treelist()
             self.deterministics = treelist()
             self.potentials = treelist()
             self._coords = {}
@@ -1435,10 +1433,11 @@ def make_obs_var(
             self.observed_RVs.append(observed_rv_var)
 
             # Create deterministic that combines observed and missing
+            # Note: This can widely increase memory consumption during sampling for large datasets
             rv_var = at.zeros(data.shape)
             rv_var = at.set_subtensor(rv_var[mask.nonzero()], missing_rv_var)
             rv_var = at.set_subtensor(rv_var[antimask_idx], observed_rv_var)
-            rv_var = Deterministic(name, rv_var, self, dims, auto=True)
+            rv_var = Deterministic(name, rv_var, self, dims)
 
         else:
             if sps.issparse(data):
@@ -1911,7 +1910,7 @@ def Point(*args, filter_model_vars=False, **kwargs) -> Dict[str, np.ndarray]:
     }
 
 
-def Deterministic(name, var, model=None, dims=None, auto=False):
+def Deterministic(name, var, model=None, dims=None):
     """Create a named deterministic variable.
 
     Deterministic nodes are only deterministic given all of their inputs, i.e.
@@ -1974,10 +1973,7 @@ def Deterministic(name, var, model=None, dims=None, auto=False):
     """
     model = modelcontext(model)
     var = var.copy(model.name_for(name))
-    if auto:
-        model.auto_deterministics.append(var)
-    else:
-        model.deterministics.append(var)
+    model.deterministics.append(var)
     model.add_named_variable(var, dims)
 
     from pymc.printing import str_for_potential_or_deterministic
diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py
index ee5b2f304a..53847b6cad 100644
--- a/pymc/sampling/forward.py
+++ b/pymc/sampling/forward.py
@@ -35,7 +35,14 @@
 import xarray
 
 from aesara import tensor as at
-from aesara.graph.basic import Apply, Constant, Variable, general_toposort, walk
+from aesara.graph.basic import (
+    Apply,
+    Constant,
+    Variable,
+    ancestors,
+    general_toposort,
+    walk,
+)
 from aesara.graph.fg import FunctionGraph
 from aesara.tensor.random.var import (
     RandomGeneratorSharedVariable,
@@ -324,6 +331,18 @@ def draw(
     return [np.stack(v) for v in drawn_values]
 
 
+def observed_dependent_deterministics(model: Model):
+    """Find deterministics that depend directly on observed variables"""
+    deterministics = model.deterministics
+    observed_rvs = set(model.observed_RVs)
+    blockers = model.basic_RVs
+    return [
+        deterministic
+        for deterministic in deterministics
+        if observed_rvs & set(ancestors([deterministic], blockers=blockers))
+    ]
+
+
 def sample_prior_predictive(
     samples: int = 500,
     model: Optional[Model] = None,
@@ -371,10 +390,7 @@ def sample_prior_predictive(
         )
 
     if var_names is None:
-        vars_: Set[str] = {
-            var.name
-            for var in model.basic_RVs + model.deterministics + model.auto_deterministics
-        }
+        vars_: Set[str] = {var.name for var in model.basic_RVs + model.deterministics}
     else:
         vars_ = set(var_names)
 
@@ -570,7 +586,7 @@ def sample_posterior_predictive(
     if var_names is not None:
         vars_ = [model[x] for x in var_names]
     else:
-        vars_ = model.observed_RVs + model.auto_deterministics
+        vars_ = model.observed_RVs + observed_dependent_deterministics(model)
 
     indices = np.arange(samples)
     if progressbar:
diff --git a/pymc/tests/sampling/test_forward.py b/pymc/tests/sampling/test_forward.py
index 447775bc01..59063c6326 100644
--- a/pymc/tests/sampling/test_forward.py
+++ b/pymc/tests/sampling/test_forward.py
@@ -38,6 +38,7 @@
 from pymc.sampling.forward import (
     compile_forward_sampling_function,
     get_vars_in_point_list,
+    observed_dependent_deterministics,
 )
 from pymc.tests.helpers import SeededTest, fast_unstable_sampling_mode
 
@@ -1621,3 +1622,19 @@ def test_get_vars_in_point_list():
     trace = MultiTrace([strace])
     vars_in_trace = get_vars_in_point_list(trace, modelB)
     assert set(vars_in_trace) == {a}
+
+
+def test_observed_dependent_deterministics():
+    with pm.Model() as m:
+        free = pm.Normal("free")
+        obs = pm.Normal("obs", observed=1)
+
+        det_free = pm.Deterministic("det_free", free + 1)
+        det_free2 = pm.Deterministic("det_free2", det_free + 1)
+
+        det_obs = pm.Deterministic("det_obs", obs + 1)
+        det_obs2 = pm.Deterministic("det_obs2", det_obs + 1)
+
+        det_mixed = pm.Deterministic("det_mixed", free + obs)
+
+    assert set(observed_dependent_deterministics(m)) == {det_obs, det_obs2, det_mixed}
diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py
index 9f767db1f7..e03cd4507b 100644
--- a/pymc/tests/test_model.py
+++ b/pymc/tests/test_model.py
@@ -1195,21 +1195,29 @@ def test_missing_dual_observations(self):
                 trace = pm.sample(chains=1, tune=5, draws=50)
 
     def test_interval_missing_observations(self):
+        rng = np.random.default_rng(1198)
+
         with pm.Model() as model:
             obs1 = np.ma.masked_values([1, 2, -1, 4, -1], value=-1)
             obs2 = np.ma.masked_values([-1, -1, 6, -1, 8], value=-1)
 
-            rng = aesara.shared(np.random.RandomState(2323), borrow=True)
-
             with pytest.warns(ImputationWarning):
-                theta1 = pm.Uniform("theta1", 0, 5, observed=obs1, rng=rng)
+                theta1 = pm.Uniform("theta1", 0, 5, observed=obs1)
             with pytest.warns(ImputationWarning):
-                theta2 = pm.Normal("theta2", mu=theta1, observed=obs2, rng=rng)
+                theta2 = pm.Normal("theta2", mu=theta1, observed=obs2)
 
             assert isinstance(model.rvs_to_transforms[model["theta1_missing"]], IntervalTransform)
             assert model.rvs_to_transforms[model["theta1_observed"]] is None
 
-            prior_trace = pm.sample_prior_predictive(return_inferencedata=False)
+            prior_trace = pm.sample_prior_predictive(random_seed=rng, return_inferencedata=False)
+            assert set(prior_trace.keys()) == {
+                "theta1",
+                "theta1_observed",
+                "theta1_missing",
+                "theta2",
+                "theta2_observed",
+                "theta2_missing",
+            }
 
             # Make sure the observed + missing combined deterministics have the
             # same shape as the original observations vectors
@@ -1237,23 +1245,47 @@ def test_interval_missing_observations(self):
                 == 0.0
             )
 
-            assert {"theta1", "theta2"} <= set(prior_trace.keys())
-
             trace = pm.sample(
-                chains=1, draws=50, compute_convergence_checks=False, return_inferencedata=False
+                chains=1,
+                draws=50,
+                compute_convergence_checks=False,
+                return_inferencedata=False,
+                random_seed=rng,
             )
+            assert set(trace.varnames) == {
+                "theta1",
+                "theta1_missing",
+                "theta1_missing_interval__",
+                "theta2",
+                "theta2_missing",
+            }
 
+            # Make sure that the missing values are newly generated samples and that
+            # the observed and deterministic match
             assert np.all(0 < trace["theta1_missing"].mean(0))
             assert np.all(0 < trace["theta2_missing"].mean(0))
-            assert "theta1" not in trace.varnames
-            assert "theta2" not in trace.varnames
+            assert np.isclose(np.mean(trace["theta1"][:, obs1.mask] - trace["theta1_missing"]), 0)
+            assert np.isclose(np.mean(trace["theta2"][:, obs2.mask] - trace["theta2_missing"]), 0)
 
-            # Make sure that the observed values are newly generated samples and that
-            # the observed and deterministic matche
-            pp_idata = pm.sample_posterior_predictive(trace)
+            # Make sure that the observed values are unchanged
+            assert np.allclose(np.var(trace["theta1"][:, ~obs1.mask], 0), 0.0)
+            assert np.allclose(np.var(trace["theta2"][:, ~obs2.mask], 0), 0.0)
+            np.testing.assert_array_equal(trace["theta1"][0][~obs1.mask], obs1[~obs1.mask])
+            np.testing.assert_array_equal(trace["theta2"][0][~obs2.mask], obs1[~obs2.mask])
+
+            pp_idata = pm.sample_posterior_predictive(trace, random_seed=rng)
             pp_trace = pp_idata.posterior_predictive.stack(sample=["chain", "draw"]).transpose(
                 "sample", ...
             )
+            assert set(pp_trace.keys()) == {
+                "theta1",
+                "theta1_observed",
+                "theta2",
+                "theta2_observed",
+            }
+
+            # Make sure that the observed values are newly generated samples and that
+            # the observed and deterministic match
             assert np.all(np.var(pp_trace["theta1"], 0) > 0.0)
             assert np.all(np.var(pp_trace["theta2"], 0) > 0.0)
             assert np.isclose(