From e351925d1d66c7404bddf8dd6e711033e9af0901 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Tue, 17 Jan 2023 23:11:57 -0800
Subject: [PATCH 01/36] add hsgp implementation

---
 pymc/gp/__init__.py       |   1 +
 pymc/gp/cov.py            | 309 +++++++++++++++++++++++++++++---------
 pymc/gp/hsgp.py           | 239 +++++++++++++++++++++++++++++
 pymc/tests/gp/test_cov.py |  88 ++++++++++-
 pymc/tests/gp/test_gp.py  |  83 ++++++++++
 5 files changed, 649 insertions(+), 71 deletions(-)
 create mode 100644 pymc/gp/hsgp.py

diff --git a/pymc/gp/__init__.py b/pymc/gp/__init__.py
index 9510f33de4..995a5f9b87 100644
--- a/pymc/gp/__init__.py
+++ b/pymc/gp/__init__.py
@@ -22,3 +22,4 @@
     MarginalKron,
     MarginalSparse,
 )
+from pymc.gp.hsgp import HSGP
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 956bbc6808..c6ec2ad164 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -14,6 +14,7 @@
 
 import warnings
 
+from collections import Counter
 from functools import reduce
 from numbers import Number
 from operator import add, mul
@@ -47,26 +48,24 @@
 ]
 
 
-class Covariance:
-    r"""
-    Base class for all kernels/covariance functions.
+def _verify_scalar(value):
+    if (
+        isinstance(value, pytensor.compile.SharedVariable)
+        and value.get_value().squeeze().shape == ()
+    ):
+        return at.squeeze(value)
+    elif np.asarray(value).squeeze().shape == ():
+        return np.squeeze(value)
+    elif isinstance(value, Number):
+        return value
 
-    Parameters
-    ----------
-    input_dim: integer
-        The number of input dimensions, or columns of X (or Xs)
-        the kernel will operate on.
-    active_dims: List of integers
-        Indicate which dimension or column of X the covariance
-        function operates on.
-    """
+    raise ValueError("A scalar value is required.")
 
-    def __init__(self, input_dim, active_dims=None):
-        self.input_dim = input_dim
-        if active_dims is None:
-            self.active_dims = np.arange(input_dim)
-        else:
-            self.active_dims = np.asarray(active_dims, int)
+
+class BaseCovariance:
+    """
+    Base class for kernels/covariance functions.
+    """
 
     def __call__(self, X, Xs=None, diag=False):
         r"""
@@ -89,27 +88,18 @@ def __call__(self, X, Xs=None, diag=False):
     def diag(self, X):
         raise NotImplementedError
 
-    def full(self, X, Xs):
+    def full(self, X, Xs=None):
         raise NotImplementedError
 
-    def _slice(self, X, Xs):
-        xdims = X.shape[-1]
-        if isinstance(xdims, Variable):
-            xdims = xdims.eval()
-        if self.input_dim != xdims:
-            warnings.warn(
-                f"Only {self.input_dim} column(s) out of {xdims} are"
-                " being used to compute the covariance function. If this"
-                " is not intended, increase 'input_dim' parameter to"
-                " the number of columns to use. Ignore otherwise.",
-                UserWarning,
-            )
-        X = at.as_tensor_variable(X[:, self.active_dims])
-        if Xs is not None:
-            Xs = at.as_tensor_variable(Xs[:, self.active_dims])
-        return X, Xs
-
     def __add__(self, other):
+        # If it's a scalar, cast as Constant covariance.  This allows validation for power spectral
+        # density calc.
+        if isinstance(other, Number):
+            try:
+                other = Constant(c=_verify_scalar(other))
+            except ValueError:
+                pass
+
         return Add([self, other])
 
     def __mul__(self, other):
@@ -122,19 +112,7 @@ def __rmul__(self, other):
         return self.__mul__(other)
 
     def __pow__(self, other):
-        if (
-            isinstance(other, pytensor.compile.SharedVariable)
-            and other.get_value().squeeze().shape == ()
-        ):
-            other = at.squeeze(other)
-            return Exponentiated(self, other)
-        elif isinstance(other, Number):
-            return Exponentiated(self, other)
-        elif np.asarray(other).squeeze().shape == ():
-            other = np.squeeze(other)
-            return Exponentiated(self, other)
-
-        raise ValueError("A covariance function can only be exponentiated by a scalar value")
+        return Exponentiated(self, _verify_scalar(other))
 
     def __array_wrap__(self, result):
         """
@@ -151,41 +129,127 @@ def __array_wrap__(self, result):
         A = np.zeros((r, c))
         for i in range(r):
             for j in range(c):
-                A[i, j] = result[i, j].factor_list[1]
+                r = result[i, j]._factor_list[1]
+                if isinstance(r, Constant):
+                    # Counteract the elemwise Add edgecase
+                    r = r.c
+                A[i, j] = r
         if isinstance(result[0][0], Add):
-            return result[0][0].factor_list[0] + A
+            return result[0][0]._factor_list[0] + A
         elif isinstance(result[0][0], Prod):
-            return result[0][0].factor_list[0] * A
+            return result[0][0]._factor_list[0] * A
         else:
             raise TypeError(
-                f"Unknown Covariance combination type {result[0][0]}.  Known types are `Add` or `Prod`."
+                f"Unknown Covariance combination type {result[0][0]}.  "
+                "Known types are `Add` or `Prod`."
             )
 
 
+class Covariance(BaseCovariance):
+    """
+    Base class for kernels/covariance functions with input_dim and active_dims, which excludes
+    kernels like `Constant` and `WhiteNoise`.
+
+    Parameters
+    ----------
+    input_dim: integer
+        The number of input dimensions, or columns of X (or Xs)
+        the kernel will operate on.
+    active_dims: List of integers
+        Indicate which dimension or column of X the covariance
+        function operates on.
+    """
+
+    def __init__(self, input_dim, active_dims=None):
+        self.input_dim = input_dim
+        if active_dims is None:
+            self.active_dims = np.arange(input_dim)
+        else:
+            self.active_dims = np.asarray(active_dims, int)
+
+        if max(self.active_dims) > self.input_dim:
+            raise ValueError("Values in `active_dims` can't be larger than `input_dim`.")
+
+    @property
+    def D(self):
+        """The dimensionality of the input, as taken from the
+        `active_dims`.
+        """
+        # Evaluate lazily in-case this changes.
+        return len(self.active_dims)
+
+    def _slice(self, X, Xs=None):
+        xdims = X.shape[-1]
+        if isinstance(xdims, Variable):
+            xdims = xdims.eval()
+        if self.input_dim != xdims:
+            warnings.warn(
+                f"Only {self.input_dim} column(s) out of {xdims} are"
+                " being used to compute the covariance function. If this"
+                " is not intended, increase 'input_dim' parameter to"
+                " the number of columns to use. Ignore otherwise.",
+                UserWarning,
+            )
+        X = at.as_tensor_variable(X[:, self.active_dims])
+        if Xs is not None:
+            Xs = at.as_tensor_variable(Xs[:, self.active_dims])
+        return X, Xs
+
+
 class Combination(Covariance):
     def __init__(self, factor_list):
-        input_dim = max(
-            factor.input_dim for factor in factor_list if isinstance(factor, Covariance)
+        """Use constituent factors to get input_dim and active_dims for the Combination covariance."""
+
+        # Check if all input_dim are the same in factor_list
+        input_dims = {factor.input_dim for factor in factor_list if isinstance(factor, Covariance)}
+
+        if len(input_dims) != 1:
+            raise ValueError("All covariances must have the same `input_dim`.")
+        input_dim = input_dims.pop()
+
+        # Union all active_dims sets in factor_list for the combination covariance
+        active_dims = np.sort(
+            np.asarray(
+                list(
+                    set.union(
+                        *[
+                            set(factor.active_dims)
+                            for factor in factor_list
+                            if isinstance(factor, Covariance)
+                        ]
+                    )
+                ),
+                dtype=int,
+            )
         )
-        super().__init__(input_dim=input_dim)
-        self.factor_list = []
+
+        super().__init__(input_dim=input_dim, active_dims=active_dims)
+
+        # Set up combination kernel, flatten out factor_list so that
+        self._factor_list = []
         for factor in factor_list:
             if isinstance(factor, self.__class__):
-                self.factor_list.extend(factor.factor_list)
+                self._factor_list.extend(factor._factor_list)
             else:
-                self.factor_list.append(factor)
+                self._factor_list.append(factor)
 
-    def merge_factors(self, X, Xs=None, diag=False):
+    def _merge_factors_cov(self, X, Xs=None, diag=False):
+        """Called to evaluate either all the sums or all the
+        products of kernels that are possible to evaluate.
+        """
         factor_list = []
-        for factor in self.factor_list:
+        for factor in self._factor_list:
+
             # make sure diag=True is handled properly
-            if isinstance(factor, Covariance):
+            if isinstance(factor, BaseCovariance):
                 factor_list.append(factor(X, Xs, diag))
+
             elif isinstance(factor, np.ndarray):
                 if np.ndim(factor) == 2 and diag:
                     factor_list.append(np.diag(factor))
                 else:
                     factor_list.append(factor)
+
             elif isinstance(
                 factor,
                 (
@@ -194,23 +258,79 @@ def merge_factors(self, X, Xs=None, diag=False):
                     TensorSharedVariable,
                 ),
             ):
+
                 if factor.ndim == 2 and diag:
                     factor_list.append(at.diag(factor))
                 else:
                     factor_list.append(factor)
+
             else:
                 factor_list.append(factor)
+
+        return factor_list
+
+    def _merge_factors_psd(self, omega):
+        """Called to evaluatate spectral densities of combination kernels when possible.  Implements
+        a more restricted set of rules than `_merge_factors_cov` -- just additivity of stationary
+        covariances with defined power spectral densities and multiplication by scalars.  Also, the
+        active_dims for all covariances in the sum must be the same.
+        """
+        factor_list = []
+        for factor in self._factor_list:
+            if isinstance(factor, Covariance):
+
+                # Allow merging covariances for psd only if active_dims are the same
+                if set(self.active_dims) != set(factor.active_dims):
+                    raise ValueError(
+                        "For power spectral density calculations `active_dims` must be the same "
+                        "for all covariances in the sum."
+                    )
+
+                # If it's a covariance try to calculate the psd
+                try:
+                    factor_list.append(factor.psd(omega))
+
+                except (AttributeError, NotImplementedError) as e:
+
+                    if isinstance(factor, Stationary):
+                        raise NotImplementedError(
+                            f"No power spectral density method has been implemented for {factor}."
+                        ) from e
+
+                    else:
+                        raise ValueError(
+                            "Power spectral densities, `.psd(omega)`, can only be calculated for "
+                            f"`Stationary` covariance functions.  {factor} is non-stationary."
+                        ) from e
+
+            else:
+                # Otherwise defer the reduction to later
+                factor_list.append(factor)
+
         return factor_list
 
 
 class Add(Combination):
     def __call__(self, X, Xs=None, diag=False):
-        return reduce(add, self.merge_factors(X, Xs, diag))
+        return reduce(add, self._merge_factors_cov(X, Xs, diag))
+
+    def psd(self, omega):
+        return reduce(add, self._merge_factors_psd(omega))
 
 
 class Prod(Combination):
     def __call__(self, X, Xs=None, diag=False):
-        return reduce(mul, self.merge_factors(X, Xs, diag))
+        return reduce(mul, self._merge_factors_cov(X, Xs, diag))
+
+    def psd(self, omega):
+        check = Counter([isinstance(factor, Covariance) for factor in self._factor_list])
+        if check.get(True) >= 2:
+            raise NotImplementedError(
+                "The power spectral density of products of covariance "
+                "functions is not implemented."
+            )
+
+        return reduce(mul, self._merge_factors_psd(omega))
 
 
 class Exponentiated(Covariance):
@@ -243,7 +363,7 @@ def __init__(self, factor_list):
         self.input_dims = [factor.input_dim for factor in factor_list]
         input_dim = sum(self.input_dims)
         super().__init__(input_dim=input_dim)
-        self.factor_list = factor_list
+        self._factor_list = factor_list
 
     def _split(self, X, Xs):
         indices = np.cumsum(self.input_dims)
@@ -256,11 +376,11 @@ def _split(self, X, Xs):
 
     def __call__(self, X, Xs=None, diag=False):
         X_split, Xs_split = self._split(X, Xs)
-        covs = [cov(x, xs, diag) for cov, x, xs in zip(self.factor_list, X_split, Xs_split)]
+        covs = [cov(x, xs, diag) for cov, x, xs in zip(self._factor_list, X_split, Xs_split)]
         return reduce(mul, covs)
 
 
-class Constant(Covariance):
+class Constant(BaseCovariance):
     r"""
     Constant valued covariance function.
 
@@ -270,7 +390,6 @@ class Constant(Covariance):
     """
 
     def __init__(self, c):
-        super().__init__(1, None)
         self.c = c
 
     def diag(self, X):
@@ -293,7 +412,6 @@ class WhiteNoise(Covariance):
     """
 
     def __init__(self, sigma):
-        super().__init__(1, None)
         self.sigma = sigma
 
     def diag(self, X):
@@ -408,6 +526,9 @@ def diag(self, X):
     def full(self, X, Xs=None):
         raise NotImplementedError
 
+    def psd(self, omega):
+        raise NotImplementedError
+
 
 class Periodic(Stationary):
     r"""
@@ -451,12 +572,26 @@ class ExpQuad(Stationary):
     .. math::
 
        k(x, x') = \mathrm{exp}\left[ -\frac{(x - x')^2}{2 \ell^2} \right]
+
+    The power spectral density (`psd`) is:
+
+    .. math::
+
+       S(\boldsymbol\omega) =
+           (\sqrt(2 \pi)^D \prod_{i}^{D}\ell_i
+            \exp\left( -\frac{1}{2} \sum_{i}^{D}\ell_i^2 \omega_i^{2} \right)
     """
 
     def full(self, X, Xs=None):
         X, Xs = self._slice(X, Xs)
         return at.exp(-0.5 * self.square_dist(X, Xs))
 
+    def psd(self, omega):
+        ls = at.ones(self.D) * self.ls
+        c = at.power(at.sqrt(2.0 * np.pi), self.D)
+        exp = at.exp(-0.5 * at.dot(at.square(omega), at.square(ls)))
+        return c * at.prod(ls) * exp
+
 
 class RatQuad(Stationary):
     r"""
@@ -488,6 +623,16 @@ class Matern52(Stationary):
        k(x, x') = \left(1 + \frac{\sqrt{5(x - x')^2}}{\ell} +
                    \frac{5(x-x')^2}{3\ell^2}\right)
                    \mathrm{exp}\left[ - \frac{\sqrt{5(x - x')^2}}{\ell} \right]
+
+    The power spectral density is:
+
+    .. math::
+
+       S(\boldsymbol\omega) =
+           \frac{2^D \pi^{\frac{D}{2}} \Gamma(\frac{D+5}{2}) 5^{5/2}}
+                {\frac{3}{4}\sqrt{\pi}}
+           \prod_{i=1}^{D}\ell_{i}
+           \left(5 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+5}{2}}
     """
 
     def full(self, X, Xs=None):
@@ -495,6 +640,14 @@ def full(self, X, Xs=None):
         r = self.euclidean_dist(X, Xs)
         return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * at.square(r)) * at.exp(-1.0 * np.sqrt(5.0) * r)
 
+    def psd(self, omega):
+        ls = at.ones(self.D) * self.ls
+        D52 = (self.D + 5) / 2
+        num = at.power(2, self.D) * at.power(np.pi, self.D / 2) * at.gamma(D52) * at.power(5, 5 / 2)
+        den = 0.75 * at.sqrt(np.pi)
+        pow = at.power(5.0 + at.dot(at.square(omega), at.square(ls)), -1 * D52)
+        return (num / den) * at.prod(ls) * pow
+
 
 class Matern32(Stationary):
     r"""
@@ -504,6 +657,16 @@ class Matern32(Stationary):
 
        k(x, x') = \left(1 + \frac{\sqrt{3(x - x')^2}}{\ell}\right)
                   \mathrm{exp}\left[ - \frac{\sqrt{3(x - x')^2}}{\ell} \right]
+
+    The power spectral density is:
+
+    .. math::
+
+        S(\boldsymbol\omega) =
+            \frac{2^D \pi^{D/2} \Gamma\left(\frac{D+3}{2}\right) 3^{3/2}}
+                 {\frac{1}{2}\sqrt{\pi}}
+           \prod_{i=1}^{D}\ell_{i}
+           \left(3 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+3}{2}}
     """
 
     def full(self, X, Xs=None):
@@ -511,6 +674,14 @@ def full(self, X, Xs=None):
         r = self.euclidean_dist(X, Xs)
         return (1.0 + np.sqrt(3.0) * r) * at.exp(-np.sqrt(3.0) * r)
 
+    def psd(self, omega):
+        ls = at.ones(self.D) * self.ls
+        D32 = (self.D + 3) / 2
+        num = at.power(2, self.D) * at.power(np.pi, self.D / 2) * at.gamma(D32) * at.power(3, 3 / 2)
+        den = 0.5 * at.sqrt(np.pi)
+        pow = at.power(3.0 + at.dot(at.square(omega), at.square(ls)), -1 * D32)
+        return (num / den) * at.prod(ls) * pow
+
 
 class Matern12(Stationary):
     r"""
diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
new file mode 100644
index 0000000000..33e3278131
--- /dev/null
+++ b/pymc/gp/hsgp.py
@@ -0,0 +1,239 @@
+#   Copyright 2022 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+import warnings
+
+from typing import Optional, Sequence
+
+import numpy as np
+import pytensor.tensor as at
+
+import pymc as pm
+
+from pymc.gp.cov import Covariance
+from pymc.gp.gp import Base
+from pymc.gp.mean import Mean, Zero
+
+
+class HSGP(Base):
+    R"""
+    Hilbert Space Gaussian process
+
+    The `gp.HSGP` class is an implementation of the Hilbert Space Gaussian process.  This
+    approximation is a linear model that uses a fixed set of basis vectors, whose coeficients are
+    random functions of a stationary covariance function's power spectral density.  Like
+    `gp.Latent`, it does not assume a Gaussian noise model and can be used with any likelihood or as
+    a component anywhere within a model.  Also like `gp.Latent`, it has `prior` and `conditional`
+    methods.  It additonally has an `approx_K` method which returns the approximate covariance
+    matrix.  It supports a limited subset of additive covariances.
+
+    For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to the
+    pymc examples documentation.
+
+    Parameters
+    ----------
+    m: list
+        The number of basis vectors to use for each active dimension (covariance parameter
+        `active_dim`).
+    L: list
+        The boundary of the space for each `active_dim`.  It is called the boundary condition.
+        Choose L such that the domain `[-L, L]` contains all points in the column of X given by the
+        `active_dim`.
+    c: 1.5
+        The proportion extension factor.  Used to construct L from X.  Defined as `S = max|X|` such
+        that `X` is in `[-S, S]`.  `L` is the calculated as `c * S`.  One of `c` or `L` must be
+        provided.  Further information can be found in Ruitort-Mayol et. al.
+    drop_first: bool
+        Default `False`. Sometimes the first basis vector is quite "flat" and very similar to
+        the intercept term.  When there is an intercept in the model, ignoring the first basis
+        vector may improve sampling.
+    cov_func: None, 2D array, or instance of Covariance
+        The covariance function.  Defaults to zero.
+    mean_func: None, instance of Mean
+        The mean function.  Defaults to zero.
+
+    Examples
+    --------
+    .. code:: python
+
+        # A three dimensional column vector of inputs.
+        X = np.random.randn(100, 3)
+
+        with pm.Model() as model:
+            # Specify the covariance function.  Three input dimensions, but we only want to use the
+            # last two.
+            cov_func = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 2])
+
+            # Specify the HSGP.  Use 50 basis vectors across each active dimension, [1, 2]  for a
+            # total of 50 * 50 = 2500.  The range of the data is inferred from X, and the boundary
+            # condition multiplier `c` uses 4 * half range.
+            gp = pm.gp.HSGP(m=[50, 50], c=4.0, cov_func=cov_func)
+
+            # Place a GP prior over the function f.
+            f = gp.prior("f", X=X)
+
+        ...
+
+        # After fitting or sampling, specify the distribution
+        # at new points with .conditional
+        Xnew = np.linspace(-1, 2, 50)[:, None]
+
+        with model:
+            fcond = gp.conditional("fcond", Xnew=Xnew)
+
+    References
+    ----------
+    -   Ruitort-Mayol, G., and Anderson, M., and Solin, A., and Vehtari, A. (2022). Practical
+    Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming
+
+    -   Solin, A., Sarkka, S. (2019) Hilbert Space Methods for Reduced-Rank Gaussian Process
+    Regression.
+    """
+
+    def __init__(
+        self,
+        m: Sequence[int],
+        L: Optional[Sequence[float]] = None,
+        c: float = 1.5,
+        drop_first=False,
+        *,
+        mean_func: Mean = Zero(),
+        cov_func: Covariance,
+    ):
+        arg_err_msg = (
+            "`m` and L, if provided, must be sequences, with one element per active "
+            "dimension of the kernel or covariance function."
+        )
+        if not isinstance(m, Sequence):
+            raise ValueError(arg_err_msg)
+        if len(m) != cov_func.D:
+            raise ValueError(arg_err_msg)
+        m = tuple(m)
+        if L is not None and (not isinstance(L, Sequence) or len(L) != cov_func.D):
+            raise ValueError(arg_err_msg)
+        elif L is not None:
+            L = tuple(L)
+
+        if L is None and c < 1.2:
+            warnings.warn(
+                "Most applications will require a `c >= 1.2` for accuracy at the boundaries of the "
+                "domain."
+            )
+
+        self.drop_first = drop_first
+        self.m = m
+        self.L = L
+        self.c = c
+        self.D = cov_func.D
+
+        super().__init__(mean_func=mean_func, cov_func=cov_func)
+
+    def __add__(self, other):
+        raise NotImplementedError("Additive HSGPs aren't supported ")
+
+    def _set_boundary(self, X):
+        """Make L from X and c if L is not passed in."""
+        if self.L is None:
+            # Define new L based on c and X range
+            La = at.abs(at.min(X, axis=0))
+            Lb = at.abs(at.max(X, axis=0))
+            self.L = self.c * at.max(at.stack((La, Lb)), axis=0)
+        else:
+            self.L = at.as_tensor_variable(self.L)
+
+    @staticmethod
+    def _eigendecomposition(X, L, m, D):
+        """Construct the eigenvalues and eigenfunctions of the Laplace operator."""
+        m_star = at.prod(m)
+        S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(D)])
+        S = np.vstack([s.flatten() for s in S]).T
+        eigvals = at.square((np.pi * S) / (2 * L))
+        phi = at.ones((X.shape[0], m_star))
+        for d in range(D):
+            c = 1.0 / np.sqrt(L[d])
+            phi *= c * at.sin(at.sqrt(eigvals[:, d]) * (at.tile(X[:, d][:, None], m_star) + L[d]))
+        omega = at.sqrt(eigvals)
+        return omega, phi, m_star
+
+    def approx_K(self, X):
+        """A helper function which gives the approximate kernel or covariance matrix K. This can be
+        helpful when trying to see how well an approximation may work.
+        """
+        X, _ = self.cov_func._slice(X)
+        self._set_boundary(X)
+        omega, phi, _ = self._eigendecomposition(X, self.L, self.m, self.cov_func.D)
+        psd = self.cov_func.psd(omega)
+        return at.dot(phi * psd, at.transpose(phi))
+
+    def prior(self, name, X, dims=None):
+        R"""
+        Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
+
+        Parameters
+        ----------
+        name: string
+            Name of the random variable
+        X: array-like
+            Function input values.
+        dims: None
+            Dimension name for the GP random variable.
+        """
+
+        X, _ = self.cov_func._slice(X)
+        self._set_boundary(X)
+        omega, phi, m_star = self._eigendecomposition(X, self.L, self.m, self.D)
+        psd = self.cov_func.psd(omega)
+
+        if self.drop_first:
+            self.beta = pm.Normal(f"{name}_coeffs_", size=m_star - 1)
+            self.f = pm.Deterministic(
+                name,
+                self.mean_func(X) + at.squeeze(at.dot(phi[:, 1:], self.beta * psd[1:])),
+                dims=dims,
+            )
+        else:
+            self.beta = pm.Normal(f"{name}_coeffs_", size=m_star)
+            self.f = pm.Deterministic(
+                name,
+                self.mean_func(X) + at.squeeze(at.dot(phi, self.beta * at.sqrt(psd))),
+                dims=dims,
+            )
+        return self.f
+
+    def _build_conditional(self, Xnew):
+        if getattr(self, "beta", None) is None:
+            raise ValueError(
+                "Prior is not set, can't create a conditional. Call `.prior(name, X)` first."
+            )
+        Xnew, _ = self.cov_func._slice(Xnew)
+        omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.D)
+        psd = self.cov_func.psd(omega)
+        return self.mean_func(Xnew) + at.squeeze(at.dot(phi, self.beta * psd))
+
+    def conditional(self, name, Xnew, dims=None):
+        R"""
+        Returns the (approximate) conditional distribution evaluated over new input locations
+        `Xnew`.
+
+        Parameters
+        ----------
+        name: string
+            Name of the random variable
+        Xnew: array-like
+            Function input values.
+        dims: None
+            Dimension name for the GP random variable.
+        """
+        fnew = self._build_conditional(Xnew)
+        return pm.Deterministic(name, fnew, dims=dims)
diff --git a/pymc/tests/gp/test_cov.py b/pymc/tests/gp/test_cov.py
index 68fc3a5ead..f16580288d 100644
--- a/pymc/tests/gp/test_cov.py
+++ b/pymc/tests/gp/test_cov.py
@@ -181,6 +181,61 @@ def test_inv_rightprod(self):
             cov = M + pm.gp.cov.ExpQuad(1, 1.0)
 
 
+class TestCovPSD:
+    def test_covpsd_add(self):
+        L = 10.0
+        omega = np.pi * np.arange(1, 101) / (2 * L)
+        with pm.Model() as model:
+            cov1 = 2 * pm.gp.cov.ExpQuad(1, 0.1)
+            cov2 = 5 * pm.gp.cov.ExpQuad(1, 1.0)
+            cov = cov1 + cov2
+        psd1 = cov1.psd(omega[:, None]).eval()
+        psd2 = cov2.psd(omega[:, None]).eval()
+        psd = cov.psd(omega[:, None]).eval()
+        npt.assert_allclose(psd, psd1 + psd2)
+
+    def test_copsd_multiply(self):
+        # This could be implemented via convolution
+        L = 10.0
+        omega = np.pi * np.arange(1, 101) / (2 * L)
+        with pm.Model() as model:
+            cov1 = 2 * pm.gp.cov.ExpQuad(1, ls=1)
+            cov2 = pm.gp.cov.ExpQuad(1, ls=1)
+
+        with pytest.raises(NotImplementedError):
+            psd = (cov1 * cov2).psd(omega[:, None]).eval()
+
+    def test_covpsd_nonstationary1(self):
+        L = 10.0
+        omega = np.pi * np.arange(1, 101) / (2 * L)
+        with pm.Model() as model:
+            cov = 2 * pm.gp.cov.Linear(1, c=5)
+
+        with pytest.raises(ValueError):
+            psd = cov.psd(omega[:, None]).eval()
+
+    def test_covpsd_nonstationary2(self):
+        L = 10.0
+        omega = np.pi * np.arange(1, 101) / (2 * L)
+        with pm.Model() as model:
+            cov = 2 * pm.gp.cov.ExpQuad(1, ls=1) + 10.0
+
+        with pytest.raises(ValueError):
+            psd = cov.psd(omega[:, None]).eval()
+
+    def test_covpsd_notimplemented(self):
+        class NewStationaryCov(pm.gp.cov.Stationary):
+            pass
+
+        L = 10.0
+        omega = np.pi * np.arange(1, 101) / (2 * L)
+        with pm.Model() as model:
+            cov = 2 * NewStationaryCov(1, ls=1)
+
+        with pytest.raises(NotImplementedError):
+            psd = cov.psd(omega[:, None]).eval()
+
+
 class TestCovExponentiation:
     def test_symexp_cov(self):
         X = np.linspace(0, 1, 10)[:, None]
@@ -228,7 +283,7 @@ def test_covexp_shared(self):
 
     def test_invalid_covexp(self):
         X = np.linspace(0, 1, 10)[:, None]
-        with pytest.raises(ValueError, match=r"can only be exponentiated by a scalar value"):
+        with pytest.raises(ValueError, match=r"scalar value is required"):
             with pm.Model() as model:
                 a = np.array([[1.0, 2.0]])
                 cov = pm.gp.cov.ExpQuad(1, 0.1) ** a
@@ -262,7 +317,7 @@ def test_multiops(self):
                 + pm.gp.cov.ExpQuad(1, 0.1)
                 + pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(1, 0.1)
             )
-            cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1)
+            cov2 = pm.gp.cov.ExpQuad(2, 0.1) * pm.gp.cov.ExpQuad(2, 0.1)
             cov = pm.gp.cov.Kron([cov1, cov2])
         K_true = kronecker(cov1(X1).eval(), cov2(X2).eval()).eval()
         K = cov(X).eval()
@@ -373,6 +428,15 @@ def test_inv_lengthscale(self):
         Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
+    def test_psd(self):
+        # compare to simple 1d formula
+        X = np.linspace(0, 1, 10)[:, None]
+        omega = np.linspace(0, 2, 50)
+        ell = 2.0
+        true_1d_psd = np.sqrt(2 * np.pi * np.square(ell)) * np.exp(-0.5 * np.square(ell * omega))
+        test_1d_psd = pm.gp.cov.ExpQuad(1, ls=ell).psd(omega[:, None]).flatten().eval()
+        npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5)
+
 
 class TestWhiteNoise:
     def test_1d(self):
@@ -449,6 +513,16 @@ def test_1d(self):
         Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
+    def test_psd(self):
+        # compare to simple 1d formula
+        X = np.linspace(0, 1, 10)[:, None]
+        omega = np.linspace(0, 2, 50)
+        ell = 2.0
+        lamda = np.sqrt(5) / ell
+        true_1d_psd = (16.0 / 3.0) * np.power(lamda, 5) * np.power(lamda**2 + omega**2, -3)
+        test_1d_psd = pm.gp.cov.Matern52(1, ls=ell).psd(omega[:, None]).flatten().eval()
+        npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5)
+
 
 class TestMatern32:
     def test_1d(self):
@@ -463,6 +537,16 @@ def test_1d(self):
         Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
+    def test_psd(self):
+        # compare to simple 1d formula
+        X = np.linspace(0, 1, 10)[:, None]
+        omega = np.linspace(0, 2, 50)
+        ell = 2.0
+        lamda = np.sqrt(3) / ell
+        true_1d_psd = 4 * np.power(lamda, 3) * np.power(lamda**2 + omega**2, -2)
+        test_1d_psd = pm.gp.cov.Matern32(1, ls=ell).psd(omega[:, None]).flatten().eval()
+        npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5)
+
 
 class TestMatern12:
     def test_1d(self):
diff --git a/pymc/tests/gp/test_gp.py b/pymc/tests/gp/test_gp.py
index 07d605eacd..1fe22a6971 100644
--- a/pymc/tests/gp/test_gp.py
+++ b/pymc/tests/gp/test_gp.py
@@ -534,3 +534,86 @@ def testMarginalKronRaises(self):
             gp2 = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
         with pytest.raises(TypeError):
             gp1 + gp2
+
+
+class TestHSGP:
+    @pytest.fixture
+    def rng(self):
+        return np.random.RandomState(10)
+
+    @pytest.fixture
+    def data(self, rng):
+        X = rng.randn(100, 1)
+        X1 = rng.randn(52, 1)
+        return X, X1
+
+    @pytest.fixture
+    def X(self, data):
+        return data[0]
+
+    @pytest.fixture
+    def X1(self, data):
+        return data[1]
+
+    @pytest.fixture
+    def model(self):
+        return pm.Model()
+
+    @pytest.fixture
+    def cov_func(self):
+        return pm.gp.cov.ExpQuad(1, ls=0.1)
+
+    @pytest.fixture
+    def gp(self, cov_func):
+        gp = pm.gp.HSGP(m=[500], c=4.0, cov_func=cov_func)
+        return gp
+
+    def test_shapes(self, model, gp, X):
+        with model:
+            ka = gp.approx_K(X)
+            kf = gp.cov_func(X)
+        # NOTE: relative difference is still high
+        np.testing.assert_allclose(ka.eval(), kf.eval(), atol=1e-10)
+
+    def test_prior(self, model, gp, X):
+        # TODO: improve mathematical side of tests
+        # So far I just check interfaces are the same for latent and HSGP
+        with model:
+            f1 = gp.prior("f1", X)
+            assert pm.draw(f1).shape == (X.shape[0],)
+            assert ~np.isnan(pm.draw(f1)).any()
+
+    def test_conditional(self, model, gp, X, X1):
+        # TODO: improve mathematical side of tests
+        # So far I just check interfaces are the same for latent and HSGP
+        with model:
+            with pytest.raises(ValueError, match="Prior is not set"):
+                gp.conditional("f1", X1)
+            gp.prior("f1", X)
+            gp.conditional("f2", X1)
+
+    def test_parametrization_m(self):
+        cov_func = pm.gp.cov.ExpQuad(1, ls=0.1)
+        with pytest.raises(
+            ValueError, match="must be sequences, with one element per active dimension"
+        ):
+            pm.gp.HSGP(m=500, c=4.0, cov_func=cov_func)
+
+        with pytest.raises(
+            ValueError, match="must be sequences, with one element per active dimension"
+        ):
+            pm.gp.HSGP(m=[500, 12], c=4.0, cov_func=cov_func)
+
+    def test_parametrization_L(self):
+        cov_func = pm.gp.cov.ExpQuad(1, ls=0.1)
+        with pytest.raises(
+            ValueError, match="must be sequences, with one element per active dimension"
+        ):
+            pm.gp.HSGP(m=[500], L=[12, 12], cov_func=cov_func)
+
+    @pytest.mark.parametrize("drop_first", [True, False])
+    def test_parametrization_drop_first(self, model, cov_func, X, drop_first):
+        with model:
+            gp = pm.gp.HSGP(m=[500], c=4.0, cov_func=cov_func, drop_first=drop_first)
+            gp.prior("f1", X)
+            assert model.f1_coeffs_.type.shape == (500 - drop_first,)

From d244d61aee2f44bc9655a9443aad489096d8f126 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Wed, 18 Jan 2023 10:45:44 -0800
Subject: [PATCH 02/36] docstring improvements

---
 pymc/gp/hsgp.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 33e3278131..50fd9013f3 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -30,13 +30,13 @@ class HSGP(Base):
     R"""
     Hilbert Space Gaussian process
 
-    The `gp.HSGP` class is an implementation of the Hilbert Space Gaussian process.  This
-    approximation is a linear model that uses a fixed set of basis vectors, whose coeficients are
-    random functions of a stationary covariance function's power spectral density.  Like
-    `gp.Latent`, it does not assume a Gaussian noise model and can be used with any likelihood or as
-    a component anywhere within a model.  Also like `gp.Latent`, it has `prior` and `conditional`
-    methods.  It additonally has an `approx_K` method which returns the approximate covariance
-    matrix.  It supports a limited subset of additive covariances.
+    The `gp.HSGP` class is an implementation of the Hilbert Space Gaussian process.  It is a
+    reduced rank GP approximation that uses a fixed set of basis vectors whose coefficients are
+    random functions of a stationary covariance function's power spectral density.  It's usage
+    is largely similar to `gp.Latent`.  Like `gp.Latent`, it does not assume a Gaussian noise model
+    and can be used with any likelihood, or as a component anywhere within a model.  Also like 
+    `gp.Latent`, it has `prior` and `conditional` methods.  It supports a limited subset of 
+    additive covariances.  
 
     For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to the
     pymc examples documentation.
@@ -77,7 +77,7 @@ class HSGP(Base):
 
             # Specify the HSGP.  Use 50 basis vectors across each active dimension, [1, 2]  for a
             # total of 50 * 50 = 2500.  The range of the data is inferred from X, and the boundary
-            # condition multiplier `c` uses 4 * half range.
+            # condition multiplier `c` uses 4 * half range of the data, `L`.
             gp = pm.gp.HSGP(m=[50, 50], c=4.0, cov_func=cov_func)
 
             # Place a GP prior over the function f.

From 6bd08d6cd9f4b46571395a0b373c1312600b6c6d Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Wed, 18 Jan 2023 11:16:18 -0800
Subject: [PATCH 03/36] fix precommit

---
 pymc/gp/hsgp.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 50fd9013f3..dd077b646c 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -34,9 +34,9 @@ class HSGP(Base):
     reduced rank GP approximation that uses a fixed set of basis vectors whose coefficients are
     random functions of a stationary covariance function's power spectral density.  It's usage
     is largely similar to `gp.Latent`.  Like `gp.Latent`, it does not assume a Gaussian noise model
-    and can be used with any likelihood, or as a component anywhere within a model.  Also like 
-    `gp.Latent`, it has `prior` and `conditional` methods.  It supports a limited subset of 
-    additive covariances.  
+    and can be used with any likelihood, or as a component anywhere within a model.  Also like
+    `gp.Latent`, it has `prior` and `conditional` methods.  It supports a limited subset of
+    additive covariances.
 
     For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to the
     pymc examples documentation.

From dadcba162cc8dd9eed04ecfdcf65cff95b949e3c Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 23 Jan 2023 17:54:37 -0800
Subject: [PATCH 04/36] add some type hints

Co-authored-by: Michael Osthege <michael.osthege@outlook.com>
---
 pymc/gp/cov.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index c6ec2ad164..0c3916e8e0 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -160,7 +160,7 @@ class Covariance(BaseCovariance):
         function operates on.
     """
 
-    def __init__(self, input_dim, active_dims=None):
+    def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]]=None):
         self.input_dim = input_dim
         if active_dims is None:
             self.active_dims = np.arange(input_dim)

From dc838a61ec117ef8afa655796d4c5a7472f04eb9 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 23 Jan 2023 20:52:29 -0800
Subject: [PATCH 05/36] rename psd to power_spectral_density, rename D to
 n_dims, address other comments

---
 pymc/gp/cov.py  | 36 ++++++++++++++++-----------------
 pymc/gp/hsgp.py | 53 +++++++++++++++++++++----------------------------
 2 files changed, 41 insertions(+), 48 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index c6ec2ad164..3f7aa19232 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -171,7 +171,7 @@ def __init__(self, input_dim, active_dims=None):
             raise ValueError("Values in `active_dims` can't be larger than `input_dim`.")
 
     @property
-    def D(self):
+    def n_dims(self):
         """The dimensionality of the input, as taken from the
         `active_dims`.
         """
@@ -288,7 +288,7 @@ def _merge_factors_psd(self, omega):
 
                 # If it's a covariance try to calculate the psd
                 try:
-                    factor_list.append(factor.psd(omega))
+                    factor_list.append(factor.power_spectral_density(omega))
 
                 except (AttributeError, NotImplementedError) as e:
 
@@ -299,7 +299,7 @@ def _merge_factors_psd(self, omega):
 
                     else:
                         raise ValueError(
-                            "Power spectral densities, `.psd(omega)`, can only be calculated for "
+                            "Power spectral densities, `.power_spectral_density(omega)`, can only be calculated for "
                             f"`Stationary` covariance functions.  {factor} is non-stationary."
                         ) from e
 
@@ -314,7 +314,7 @@ class Add(Combination):
     def __call__(self, X, Xs=None, diag=False):
         return reduce(add, self._merge_factors_cov(X, Xs, diag))
 
-    def psd(self, omega):
+    def power_spectral_density(self, omega):
         return reduce(add, self._merge_factors_psd(omega))
 
 
@@ -322,7 +322,7 @@ class Prod(Combination):
     def __call__(self, X, Xs=None, diag=False):
         return reduce(mul, self._merge_factors_cov(X, Xs, diag))
 
-    def psd(self, omega):
+    def power_spectral_density(self, omega):
         check = Counter([isinstance(factor, Covariance) for factor in self._factor_list])
         if check.get(True) >= 2:
             raise NotImplementedError(
@@ -526,7 +526,7 @@ def diag(self, X):
     def full(self, X, Xs=None):
         raise NotImplementedError
 
-    def psd(self, omega):
+    def power_spectral_density(self, omega):
         raise NotImplementedError
 
 
@@ -573,7 +573,7 @@ class ExpQuad(Stationary):
 
        k(x, x') = \mathrm{exp}\left[ -\frac{(x - x')^2}{2 \ell^2} \right]
 
-    The power spectral density (`psd`) is:
+    The power spectral density (`power_spectral_density`) is:
 
     .. math::
 
@@ -586,9 +586,9 @@ def full(self, X, Xs=None):
         X, Xs = self._slice(X, Xs)
         return at.exp(-0.5 * self.square_dist(X, Xs))
 
-    def psd(self, omega):
-        ls = at.ones(self.D) * self.ls
-        c = at.power(at.sqrt(2.0 * np.pi), self.D)
+    def power_spectral_density(self, omega):
+        ls = at.ones(self.n_dims) * self.ls
+        c = at.power(at.sqrt(2.0 * np.pi), self.n_dims)
         exp = at.exp(-0.5 * at.dot(at.square(omega), at.square(ls)))
         return c * at.prod(ls) * exp
 
@@ -640,10 +640,10 @@ def full(self, X, Xs=None):
         r = self.euclidean_dist(X, Xs)
         return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * at.square(r)) * at.exp(-1.0 * np.sqrt(5.0) * r)
 
-    def psd(self, omega):
-        ls = at.ones(self.D) * self.ls
-        D52 = (self.D + 5) / 2
-        num = at.power(2, self.D) * at.power(np.pi, self.D / 2) * at.gamma(D52) * at.power(5, 5 / 2)
+    def power_spectral_density(self, omega):
+        ls = at.ones(self.n_dims) * self.ls
+        D52 = (self.n_dims + 5) / 2
+        num = at.power(2, self.n_dims) * at.power(np.pi, self.n_dims / 2) * at.gamma(D52) * at.power(5, 5 / 2)
         den = 0.75 * at.sqrt(np.pi)
         pow = at.power(5.0 + at.dot(at.square(omega), at.square(ls)), -1 * D52)
         return (num / den) * at.prod(ls) * pow
@@ -674,10 +674,10 @@ def full(self, X, Xs=None):
         r = self.euclidean_dist(X, Xs)
         return (1.0 + np.sqrt(3.0) * r) * at.exp(-np.sqrt(3.0) * r)
 
-    def psd(self, omega):
-        ls = at.ones(self.D) * self.ls
-        D32 = (self.D + 3) / 2
-        num = at.power(2, self.D) * at.power(np.pi, self.D / 2) * at.gamma(D32) * at.power(3, 3 / 2)
+    def power_spectral_density(self, omega):
+        ls = at.ones(self.n_dims) * self.ls
+        D32 = (self.n_dims + 3) / 2
+        num = at.power(2, self.n_dims) * at.power(np.pi, self.n_dims / 2) * at.gamma(D32) * at.power(3, 3 / 2)
         den = 0.5 * at.sqrt(np.pi)
         pow = at.power(3.0 + at.dot(at.square(omega), at.square(ls)), -1 * D32)
         return (num / den) * at.prod(ls) * pow
diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index dd077b646c..df89d2341d 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -106,7 +106,7 @@ def __init__(
         m: Sequence[int],
         L: Optional[Sequence[float]] = None,
         c: float = 1.5,
-        drop_first=False,
+        drop_first: bool = False,
         *,
         mean_func: Mean = Zero(),
         cov_func: Covariance,
@@ -117,17 +117,17 @@ def __init__(
         )
         if not isinstance(m, Sequence):
             raise ValueError(arg_err_msg)
-        if len(m) != cov_func.D:
+        if len(m) != cov_func.n_dims:
             raise ValueError(arg_err_msg)
         m = tuple(m)
-        if L is not None and (not isinstance(L, Sequence) or len(L) != cov_func.D):
+        if L is not None and (not isinstance(L, Sequence) or len(L) != cov_func.n_dims):
             raise ValueError(arg_err_msg)
         elif L is not None:
             L = tuple(L)
 
         if L is None and c < 1.2:
             warnings.warn(
-                "Most applications will require a `c >= 1.2` for accuracy at the boundaries of the "
+                "Most applications will require `c >= 1.2` for accuracy at the boundaries of the "
                 "domain."
             )
 
@@ -135,11 +135,11 @@ def __init__(
         self.m = m
         self.L = L
         self.c = c
-        self.D = cov_func.D
+        self.n_dims = cov_func.n_dims
 
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
-    def __add__(self, other):
+    def __add__(self, other: HSGP):
         raise NotImplementedError("Additive HSGPs aren't supported ")
 
     def _set_boundary(self, X):
@@ -153,14 +153,14 @@ def _set_boundary(self, X):
             self.L = at.as_tensor_variable(self.L)
 
     @staticmethod
-    def _eigendecomposition(X, L, m, D):
+    def _eigendecomposition(X, L, m, n_dims):
         """Construct the eigenvalues and eigenfunctions of the Laplace operator."""
         m_star = at.prod(m)
-        S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(D)])
+        S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(n_dims)])
         S = np.vstack([s.flatten() for s in S]).T
         eigvals = at.square((np.pi * S) / (2 * L))
         phi = at.ones((X.shape[0], m_star))
-        for d in range(D):
+        for d in range(n_dims):
             c = 1.0 / np.sqrt(L[d])
             phi *= c * at.sin(at.sqrt(eigvals[:, d]) * (at.tile(X[:, d][:, None], m_star) + L[d]))
         omega = at.sqrt(eigvals)
@@ -172,8 +172,8 @@ def approx_K(self, X):
         """
         X, _ = self.cov_func._slice(X)
         self._set_boundary(X)
-        omega, phi, _ = self._eigendecomposition(X, self.L, self.m, self.cov_func.D)
-        psd = self.cov_func.psd(omega)
+        omega, phi, _ = self._eigendecomposition(X, self.L, self.m, self.cov_func.n_dims)
+        psd = self.cov_func.power_spectral_density(omega)
         return at.dot(phi * psd, at.transpose(phi))
 
     def prior(self, name, X, dims=None):
@@ -192,23 +192,16 @@ def prior(self, name, X, dims=None):
 
         X, _ = self.cov_func._slice(X)
         self._set_boundary(X)
-        omega, phi, m_star = self._eigendecomposition(X, self.L, self.m, self.D)
-        psd = self.cov_func.psd(omega)
-
-        if self.drop_first:
-            self.beta = pm.Normal(f"{name}_coeffs_", size=m_star - 1)
-            self.f = pm.Deterministic(
-                name,
-                self.mean_func(X) + at.squeeze(at.dot(phi[:, 1:], self.beta * psd[1:])),
-                dims=dims,
-            )
-        else:
-            self.beta = pm.Normal(f"{name}_coeffs_", size=m_star)
-            self.f = pm.Deterministic(
-                name,
-                self.mean_func(X) + at.squeeze(at.dot(phi, self.beta * at.sqrt(psd))),
-                dims=dims,
-            )
+        omega, phi, m_star = self._eigendecomposition(X, self.L, self.m, self.n_dims)
+        psd = self.cov_func.power_spectral_density(omega)
+
+        i = int(self.drop_first==True)
+        self.beta = pm.Normal(f"{name}_coeffs_", size=m_star - i)
+        self.f = pm.Deterministic(
+            name,
+            self.mean_func(X) + at.squeeze(at.dot(phi[:, i:], self.beta * psd[i:])),
+            dims=dims,
+        )
         return self.f
 
     def _build_conditional(self, Xnew):
@@ -217,8 +210,8 @@ def _build_conditional(self, Xnew):
                 "Prior is not set, can't create a conditional. Call `.prior(name, X)` first."
             )
         Xnew, _ = self.cov_func._slice(Xnew)
-        omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.D)
-        psd = self.cov_func.psd(omega)
+        omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.n_dims)
+        psd = self.cov_func.power_spectral_density(omega)
         return self.mean_func(Xnew) + at.squeeze(at.dot(phi, self.beta * psd))
 
     def conditional(self, name, Xnew, dims=None):

From bda5831171f1088953292b2a4b539fa1505839f6 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Wed, 25 Jan 2023 14:28:59 -0800
Subject: [PATCH 06/36] add type hints, refactor to allow user to bypass GP api

---
 pymc/gp/cov.py  | 80 ++++++++++++++++++++++++-----------------
 pymc/gp/hsgp.py | 95 +++++++++++++++++++++++++++----------------------
 2 files changed, 101 insertions(+), 74 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index e454496479..558419fc8e 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -18,6 +18,7 @@
 from functools import reduce
 from numbers import Number
 from operator import add, mul
+from typing import Optional, Sequence
 
 import numpy as np
 import pytensor
@@ -160,7 +161,7 @@ class Covariance(BaseCovariance):
         function operates on.
     """
 
-    def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]]=None):
+    def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]] = None):
         self.input_dim = input_dim
         if active_dims is None:
             self.active_dims = np.arange(input_dim)
@@ -299,8 +300,9 @@ def _merge_factors_psd(self, omega):
 
                     else:
                         raise ValueError(
-                            "Power spectral densities, `.power_spectral_density(omega)`, can only be calculated for "
-                            f"`Stationary` covariance functions.  {factor} is non-stationary."
+                            "Power spectral densities, `.power_spectral_density(omega)`, can only "
+                            f"be calculated for `Stationary` covariance functions.  {factor} is "
+                            "non-stationary."
                         ) from e
 
             else:
@@ -573,13 +575,6 @@ class ExpQuad(Stationary):
 
        k(x, x') = \mathrm{exp}\left[ -\frac{(x - x')^2}{2 \ell^2} \right]
 
-    The power spectral density (`power_spectral_density`) is:
-
-    .. math::
-
-       S(\boldsymbol\omega) =
-           (\sqrt(2 \pi)^D \prod_{i}^{D}\ell_i
-            \exp\left( -\frac{1}{2} \sum_{i}^{D}\ell_i^2 \omega_i^{2} \right)
     """
 
     def full(self, X, Xs=None):
@@ -587,6 +582,15 @@ def full(self, X, Xs=None):
         return at.exp(-0.5 * self.square_dist(X, Xs))
 
     def power_spectral_density(self, omega):
+        r"""
+        The power spectral density for the ExpQuad kernel is:
+
+        .. math::
+
+           S(\boldsymbol\omega) =
+               (\sqrt(2 \pi)^D \prod_{i}^{D}\ell_i
+                \exp\left( -\frac{1}{2} \sum_{i}^{D}\ell_i^2 \omega_i^{2} \right)
+        """
         ls = at.ones(self.n_dims) * self.ls
         c = at.power(at.sqrt(2.0 * np.pi), self.n_dims)
         exp = at.exp(-0.5 * at.dot(at.square(omega), at.square(ls)))
@@ -623,16 +627,6 @@ class Matern52(Stationary):
        k(x, x') = \left(1 + \frac{\sqrt{5(x - x')^2}}{\ell} +
                    \frac{5(x-x')^2}{3\ell^2}\right)
                    \mathrm{exp}\left[ - \frac{\sqrt{5(x - x')^2}}{\ell} \right]
-
-    The power spectral density is:
-
-    .. math::
-
-       S(\boldsymbol\omega) =
-           \frac{2^D \pi^{\frac{D}{2}} \Gamma(\frac{D+5}{2}) 5^{5/2}}
-                {\frac{3}{4}\sqrt{\pi}}
-           \prod_{i=1}^{D}\ell_{i}
-           \left(5 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+5}{2}}
     """
 
     def full(self, X, Xs=None):
@@ -641,9 +635,25 @@ def full(self, X, Xs=None):
         return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * at.square(r)) * at.exp(-1.0 * np.sqrt(5.0) * r)
 
     def power_spectral_density(self, omega):
+        r"""
+        The power spectral density for the Matern52 kernel is:
+
+        .. math::
+
+           S(\boldsymbol\omega) =
+               \frac{2^D \pi^{\frac{D}{2}} \Gamma(\frac{D+5}{2}) 5^{5/2}}
+                    {\frac{3}{4}\sqrt{\pi}}
+               \prod_{i=1}^{D}\ell_{i}
+               \left(5 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+5}{2}}
+        """
         ls = at.ones(self.n_dims) * self.ls
         D52 = (self.n_dims + 5) / 2
-        num = at.power(2, self.n_dims) * at.power(np.pi, self.n_dims / 2) * at.gamma(D52) * at.power(5, 5 / 2)
+        num = (
+            at.power(2, self.n_dims)
+            * at.power(np.pi, self.n_dims / 2)
+            * at.gamma(D52)
+            * at.power(5, 5 / 2)
+        )
         den = 0.75 * at.sqrt(np.pi)
         pow = at.power(5.0 + at.dot(at.square(omega), at.square(ls)), -1 * D52)
         return (num / den) * at.prod(ls) * pow
@@ -657,16 +667,6 @@ class Matern32(Stationary):
 
        k(x, x') = \left(1 + \frac{\sqrt{3(x - x')^2}}{\ell}\right)
                   \mathrm{exp}\left[ - \frac{\sqrt{3(x - x')^2}}{\ell} \right]
-
-    The power spectral density is:
-
-    .. math::
-
-        S(\boldsymbol\omega) =
-            \frac{2^D \pi^{D/2} \Gamma\left(\frac{D+3}{2}\right) 3^{3/2}}
-                 {\frac{1}{2}\sqrt{\pi}}
-           \prod_{i=1}^{D}\ell_{i}
-           \left(3 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+3}{2}}
     """
 
     def full(self, X, Xs=None):
@@ -675,9 +675,25 @@ def full(self, X, Xs=None):
         return (1.0 + np.sqrt(3.0) * r) * at.exp(-np.sqrt(3.0) * r)
 
     def power_spectral_density(self, omega):
+        r"""
+        The power spectral density for the Matern32 kernel is:
+
+        .. math::
+
+            S(\boldsymbol\omega) =
+                \frac{2^D \pi^{D/2} \Gamma\left(\frac{D+3}{2}\right) 3^{3/2}}
+                     {\frac{1}{2}\sqrt{\pi}}
+               \prod_{i=1}^{D}\ell_{i}
+               \left(3 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+3}{2}}
+        """
         ls = at.ones(self.n_dims) * self.ls
         D32 = (self.n_dims + 3) / 2
-        num = at.power(2, self.n_dims) * at.power(np.pi, self.n_dims / 2) * at.gamma(D32) * at.power(3, 3 / 2)
+        num = (
+            at.power(2, self.n_dims)
+            * at.power(np.pi, self.n_dims / 2)
+            * at.gamma(D32)
+            * at.power(3, 3 / 2)
+        )
         den = 0.5 * at.sqrt(np.pi)
         pow = at.power(3.0 + at.dot(at.square(omega), at.square(ls)), -1 * D32)
         return (num / den) * at.prod(ls) * pow
diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index df89d2341d..3c2f38e13d 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -1,5 +1,3 @@
-#   Copyright 2022 The PyMC Developers
-#
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
 #   You may obtain a copy of the License at
@@ -14,13 +12,14 @@
 
 import warnings
 
-from typing import Optional, Sequence
+from typing import Optional, Sequence, Union
 
 import numpy as np
-import pytensor.tensor as at
+import pytensor.tensor as pt
 
 import pymc as pm
 
+from pymc.distributions.shape_utils import Dims
 from pymc.gp.cov import Covariance
 from pymc.gp.gp import Base
 from pymc.gp.mean import Mean, Zero
@@ -105,21 +104,24 @@ def __init__(
         self,
         m: Sequence[int],
         L: Optional[Sequence[float]] = None,
-        c: float = 1.5,
+        c: Optional[float] = 1.5,
         drop_first: bool = False,
         *,
         mean_func: Mean = Zero(),
         cov_func: Covariance,
     ):
         arg_err_msg = (
-            "`m` and L, if provided, must be sequences, with one element per active "
+            "`m` and L, if provided, must be sequences with one element per active "
             "dimension of the kernel or covariance function."
         )
+
         if not isinstance(m, Sequence):
             raise ValueError(arg_err_msg)
+
         if len(m) != cov_func.n_dims:
             raise ValueError(arg_err_msg)
         m = tuple(m)
+
         if L is not None and (not isinstance(L, Sequence) or len(L) != cov_func.n_dims):
             raise ValueError(arg_err_msg)
         elif L is not None:
@@ -132,51 +134,57 @@ def __init__(
             )
 
         self.drop_first = drop_first
-        self.m = m
         self.L = L
+        self.m = m
         self.c = c
         self.n_dims = cov_func.n_dims
 
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
-    def __add__(self, other: HSGP):
+    def __add__(self, other):
         raise NotImplementedError("Additive HSGPs aren't supported ")
 
     def _set_boundary(self, X):
         """Make L from X and c if L is not passed in."""
         if self.L is None:
             # Define new L based on c and X range
-            La = at.abs(at.min(X, axis=0))
-            Lb = at.abs(at.max(X, axis=0))
-            self.L = self.c * at.max(at.stack((La, Lb)), axis=0)
+            La = pt.abs(pt.min(X, axis=0))
+            Lb = pt.abs(pt.max(X, axis=0))
+            self.L = self.c * pt.max(pt.stack((La, Lb)), axis=0)
         else:
-            self.L = at.as_tensor_variable(self.L)
+            self.L = pt.as_tensor_variable(self.L)
+
+    def prior_components(self, X: Union[np.ndarray, pt.TensorVariable]):
+        """Return the basis and coefficient priors, whose product is the HSGP.  It can be useful
+        to bypass the GP API and work with the basis and coefficients directly for models with i.e.
+        multiple GPs.
+        """
+        X, _ = self.cov_func._slice(X)
+        self._set_boundary(X)
+
+        omega, phi, m_star = self._eigendecomposition(X, self.L, self.m, self.n_dims)
+        psd = self.cov_func.power_spectral_density(omega)
+
+        i = int(self.drop_first == True)
+        return phi[:, i:], psd[i:], m_star - i
 
     @staticmethod
     def _eigendecomposition(X, L, m, n_dims):
         """Construct the eigenvalues and eigenfunctions of the Laplace operator."""
-        m_star = at.prod(m)
+        m_star = pt.prod(m)
         S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(n_dims)])
         S = np.vstack([s.flatten() for s in S]).T
-        eigvals = at.square((np.pi * S) / (2 * L))
-        phi = at.ones((X.shape[0], m_star))
+        eigvals = pt.square((np.pi * S) / (2 * L))
+        phi = pt.ones((X.shape[0], m_star))
         for d in range(n_dims):
             c = 1.0 / np.sqrt(L[d])
-            phi *= c * at.sin(at.sqrt(eigvals[:, d]) * (at.tile(X[:, d][:, None], m_star) + L[d]))
-        omega = at.sqrt(eigvals)
+            phi *= c * pt.sin(pt.sqrt(eigvals[:, d]) * (pt.tile(X[:, d][:, None], m_star) + L[d]))
+        omega = pt.sqrt(eigvals)
         return omega, phi, m_star
 
-    def approx_K(self, X):
-        """A helper function which gives the approximate kernel or covariance matrix K. This can be
-        helpful when trying to see how well an approximation may work.
-        """
-        X, _ = self.cov_func._slice(X)
-        self._set_boundary(X)
-        omega, phi, _ = self._eigendecomposition(X, self.L, self.m, self.cov_func.n_dims)
-        psd = self.cov_func.power_spectral_density(omega)
-        return at.dot(phi * psd, at.transpose(phi))
-
-    def prior(self, name, X, dims=None):
+    def prior(
+        self, name: str, X: Union[np.ndarray, pt.TensorVariable], dims: Optional[Dims] = None
+    ):
         R"""
         Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
 
@@ -190,31 +198,34 @@ def prior(self, name, X, dims=None):
             Dimension name for the GP random variable.
         """
 
-        X, _ = self.cov_func._slice(X)
-        self._set_boundary(X)
-        omega, phi, m_star = self._eigendecomposition(X, self.L, self.m, self.n_dims)
-        psd = self.cov_func.power_spectral_density(omega)
+        phi, psd, m_star = self.prior_components(X)
+        self.beta = pm.Normal(f"{name}_hsgp_coeffs_", size=m_star)
 
-        i = int(self.drop_first==True)
-        self.beta = pm.Normal(f"{name}_coeffs_", size=m_star - i)
         self.f = pm.Deterministic(
             name,
-            self.mean_func(X) + at.squeeze(at.dot(phi[:, i:], self.beta * psd[i:])),
+            self.mean_func(X) + pt.squeeze(pt.dot(phi, self.beta * psd)),
             dims=dims,
         )
         return self.f
 
-    def _build_conditional(self, Xnew):
-        if getattr(self, "beta", None) is None:
-            raise ValueError(
-                "Prior is not set, can't create a conditional. Call `.prior(name, X)` first."
-            )
+    def _build_conditional(self, Xnew, beta=None):
+        if beta is None:
+            try:
+                beta = self.beta
+            except AttributeError:
+                raise ValueError(
+                    "Prior is not set, can't create a conditional. Either pass in the HSGP beta "
+                    "coefficients or call `.prior(name, X)` first."
+                )
+
         Xnew, _ = self.cov_func._slice(Xnew)
         omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.n_dims)
         psd = self.cov_func.power_spectral_density(omega)
-        return self.mean_func(Xnew) + at.squeeze(at.dot(phi, self.beta * psd))
+        return self.mean_func(Xnew) + pt.squeeze(pt.dot(phi, beta * psd))
 
-    def conditional(self, name, Xnew, dims=None):
+    def conditional(
+        self, name: str, Xnew: Union[np.ndarray, pt.TensorVariable], dims: Optional[Dims] = None
+    ):
         R"""
         Returns the (approximate) conditional distribution evaluated over new input locations
         `Xnew`.

From a8d0fdc330ffd91746f3bb54338215abb4a63b16 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Wed, 25 Jan 2023 23:43:15 -0800
Subject: [PATCH 07/36] fix tests up

---
 pymc/gp/hsgp.py          |  8 ++++++--
 pymc/tests/gp/test_gp.py | 15 ++++-----------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 3c2f38e13d..fcba3c2f32 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -224,7 +224,11 @@ def _build_conditional(self, Xnew, beta=None):
         return self.mean_func(Xnew) + pt.squeeze(pt.dot(phi, beta * psd))
 
     def conditional(
-        self, name: str, Xnew: Union[np.ndarray, pt.TensorVariable], dims: Optional[Dims] = None
+        self,
+        name: str,
+        Xnew: Union[np.ndarray, pt.TensorVariable],
+        dims: Optional[Dims] = None,
+        beta: Optional[pt.TensorVariable] = None,
     ):
         R"""
         Returns the (approximate) conditional distribution evaluated over new input locations
@@ -239,5 +243,5 @@ def conditional(
         dims: None
             Dimension name for the GP random variable.
         """
-        fnew = self._build_conditional(Xnew)
+        fnew = self._build_conditional(Xnew, beta=beta)
         return pm.Deterministic(name, fnew, dims=dims)
diff --git a/pymc/tests/gp/test_gp.py b/pymc/tests/gp/test_gp.py
index 1fe22a6971..ddfe4f2e3d 100644
--- a/pymc/tests/gp/test_gp.py
+++ b/pymc/tests/gp/test_gp.py
@@ -568,13 +568,6 @@ def gp(self, cov_func):
         gp = pm.gp.HSGP(m=[500], c=4.0, cov_func=cov_func)
         return gp
 
-    def test_shapes(self, model, gp, X):
-        with model:
-            ka = gp.approx_K(X)
-            kf = gp.cov_func(X)
-        # NOTE: relative difference is still high
-        np.testing.assert_allclose(ka.eval(), kf.eval(), atol=1e-10)
-
     def test_prior(self, model, gp, X):
         # TODO: improve mathematical side of tests
         # So far I just check interfaces are the same for latent and HSGP
@@ -595,19 +588,19 @@ def test_conditional(self, model, gp, X, X1):
     def test_parametrization_m(self):
         cov_func = pm.gp.cov.ExpQuad(1, ls=0.1)
         with pytest.raises(
-            ValueError, match="must be sequences, with one element per active dimension"
+            ValueError, match="must be sequences with one element per active dimension"
         ):
             pm.gp.HSGP(m=500, c=4.0, cov_func=cov_func)
 
         with pytest.raises(
-            ValueError, match="must be sequences, with one element per active dimension"
+            ValueError, match="must be sequences with one element per active dimension"
         ):
             pm.gp.HSGP(m=[500, 12], c=4.0, cov_func=cov_func)
 
     def test_parametrization_L(self):
         cov_func = pm.gp.cov.ExpQuad(1, ls=0.1)
         with pytest.raises(
-            ValueError, match="must be sequences, with one element per active dimension"
+            ValueError, match="must be sequences with one element per active dimension"
         ):
             pm.gp.HSGP(m=[500], L=[12, 12], cov_func=cov_func)
 
@@ -616,4 +609,4 @@ def test_parametrization_drop_first(self, model, cov_func, X, drop_first):
         with model:
             gp = pm.gp.HSGP(m=[500], c=4.0, cov_func=cov_func, drop_first=drop_first)
             gp.prior("f1", X)
-            assert model.f1_coeffs_.type.shape == (500 - drop_first,)
+            assert model.f1_hsgp_coeffs_.type.shape == (500 - drop_first,)

From 8b88a5925da20471179cdac810bd20d0bddb4e43 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Thu, 26 Jan 2023 09:32:05 -0800
Subject: [PATCH 08/36] fix cov psd tests

---
 pymc/tests/gp/test_cov.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/pymc/tests/gp/test_cov.py b/pymc/tests/gp/test_cov.py
index f16580288d..aba96067c8 100644
--- a/pymc/tests/gp/test_cov.py
+++ b/pymc/tests/gp/test_cov.py
@@ -189,9 +189,9 @@ def test_covpsd_add(self):
             cov1 = 2 * pm.gp.cov.ExpQuad(1, 0.1)
             cov2 = 5 * pm.gp.cov.ExpQuad(1, 1.0)
             cov = cov1 + cov2
-        psd1 = cov1.psd(omega[:, None]).eval()
-        psd2 = cov2.psd(omega[:, None]).eval()
-        psd = cov.psd(omega[:, None]).eval()
+        psd1 = cov1.power_spectral_density(omega[:, None]).eval()
+        psd2 = cov2.power_spectral_density(omega[:, None]).eval()
+        psd = cov.power_spectral_density(omega[:, None]).eval()
         npt.assert_allclose(psd, psd1 + psd2)
 
     def test_copsd_multiply(self):
@@ -203,7 +203,7 @@ def test_copsd_multiply(self):
             cov2 = pm.gp.cov.ExpQuad(1, ls=1)
 
         with pytest.raises(NotImplementedError):
-            psd = (cov1 * cov2).psd(omega[:, None]).eval()
+            psd = (cov1 * cov2).power_spectral_density(omega[:, None]).eval()
 
     def test_covpsd_nonstationary1(self):
         L = 10.0
@@ -212,7 +212,7 @@ def test_covpsd_nonstationary1(self):
             cov = 2 * pm.gp.cov.Linear(1, c=5)
 
         with pytest.raises(ValueError):
-            psd = cov.psd(omega[:, None]).eval()
+            psd = cov.power_spectral_density(omega[:, None]).eval()
 
     def test_covpsd_nonstationary2(self):
         L = 10.0
@@ -221,7 +221,7 @@ def test_covpsd_nonstationary2(self):
             cov = 2 * pm.gp.cov.ExpQuad(1, ls=1) + 10.0
 
         with pytest.raises(ValueError):
-            psd = cov.psd(omega[:, None]).eval()
+            psd = cov.power_spectral_density(omega[:, None]).eval()
 
     def test_covpsd_notimplemented(self):
         class NewStationaryCov(pm.gp.cov.Stationary):
@@ -233,7 +233,7 @@ class NewStationaryCov(pm.gp.cov.Stationary):
             cov = 2 * NewStationaryCov(1, ls=1)
 
         with pytest.raises(NotImplementedError):
-            psd = cov.psd(omega[:, None]).eval()
+            psd = cov.power_spectral_density(omega[:, None]).eval()
 
 
 class TestCovExponentiation:
@@ -434,7 +434,9 @@ def test_psd(self):
         omega = np.linspace(0, 2, 50)
         ell = 2.0
         true_1d_psd = np.sqrt(2 * np.pi * np.square(ell)) * np.exp(-0.5 * np.square(ell * omega))
-        test_1d_psd = pm.gp.cov.ExpQuad(1, ls=ell).psd(omega[:, None]).flatten().eval()
+        test_1d_psd = (
+            pm.gp.cov.ExpQuad(1, ls=ell).power_spectral_density(omega[:, None]).flatten().eval()
+        )
         npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5)
 
 
@@ -520,7 +522,9 @@ def test_psd(self):
         ell = 2.0
         lamda = np.sqrt(5) / ell
         true_1d_psd = (16.0 / 3.0) * np.power(lamda, 5) * np.power(lamda**2 + omega**2, -3)
-        test_1d_psd = pm.gp.cov.Matern52(1, ls=ell).psd(omega[:, None]).flatten().eval()
+        test_1d_psd = (
+            pm.gp.cov.Matern52(1, ls=ell).power_spectral_density(omega[:, None]).flatten().eval()
+        )
         npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5)
 
 
@@ -544,7 +548,9 @@ def test_psd(self):
         ell = 2.0
         lamda = np.sqrt(3) / ell
         true_1d_psd = 4 * np.power(lamda, 3) * np.power(lamda**2 + omega**2, -2)
-        test_1d_psd = pm.gp.cov.Matern32(1, ls=ell).psd(omega[:, None]).flatten().eval()
+        test_1d_psd = (
+            pm.gp.cov.Matern32(1, ls=ell).power_spectral_density(omega[:, None]).flatten().eval()
+        )
         npt.assert_allclose(true_1d_psd, test_1d_psd, atol=1e-5)
 
 

From cebb6cff1bebb6ba7617c4c1257353f61d7eec27 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Thu, 26 Jan 2023 10:43:57 -0800
Subject: [PATCH 09/36] add docstring, no need to use m_star, can use size to
 set beta size

---
 pymc/gp/hsgp.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index fcba3c2f32..a02b0e1f0a 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -157,7 +157,13 @@ def _set_boundary(self, X):
     def prior_components(self, X: Union[np.ndarray, pt.TensorVariable]):
         """Return the basis and coefficient priors, whose product is the HSGP.  It can be useful
         to bypass the GP API and work with the basis and coefficients directly for models with i.e.
-        multiple GPs.
+        multiple GPs.  It returns the basis `phi` and the power spectral density, `psd`.  The GP
+        `f` can be formed by `f = phi @ (pm.Normal("hsgp_coeffs", size=psd.size) * psd)`.
+
+        Parameters
+        ----------
+        X: array-like
+            Function input values.
         """
         X, _ = self.cov_func._slice(X)
         self._set_boundary(X)
@@ -166,7 +172,7 @@ def prior_components(self, X: Union[np.ndarray, pt.TensorVariable]):
         psd = self.cov_func.power_spectral_density(omega)
 
         i = int(self.drop_first == True)
-        return phi[:, i:], psd[i:], m_star - i
+        return phi[:, i:], psd[i:]
 
     @staticmethod
     def _eigendecomposition(X, L, m, n_dims):
@@ -198,12 +204,12 @@ def prior(
             Dimension name for the GP random variable.
         """
 
-        phi, psd, m_star = self.prior_components(X)
-        self.beta = pm.Normal(f"{name}_hsgp_coeffs_", size=m_star)
+        phi, psd = self.prior_components(X)
+        self.beta = pm.Normal(f"{name}_hsgp_coeffs_", size=psd.size)
 
         self.f = pm.Deterministic(
             name,
-            self.mean_func(X) + pt.squeeze(pt.dot(phi, self.beta * psd)),
+            self.mean_func(X) + phi @ (self.beta * psd),
             dims=dims,
         )
         return self.f
@@ -221,7 +227,7 @@ def _build_conditional(self, Xnew, beta=None):
         Xnew, _ = self.cov_func._slice(Xnew)
         omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.n_dims)
         psd = self.cov_func.power_spectral_density(omega)
-        return self.mean_func(Xnew) + pt.squeeze(pt.dot(phi, beta * psd))
+        return self.mean_func(Xnew) + phi @ (beta * psd)
 
     def conditional(
         self,

From 8a84b1903cfe525003908b795785bb7123c21aff Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Thu, 26 Jan 2023 14:10:50 -0800
Subject: [PATCH 10/36] fix mypy errors, missing drop_one clause in
 conditional, add conditional_components

---
 pymc/gp/hsgp.py | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index a02b0e1f0a..52e557ee8d 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -19,7 +19,6 @@
 
 import pymc as pm
 
-from pymc.distributions.shape_utils import Dims
 from pymc.gp.cov import Covariance
 from pymc.gp.gp import Base
 from pymc.gp.mean import Mean, Zero
@@ -37,8 +36,8 @@ class HSGP(Base):
     `gp.Latent`, it has `prior` and `conditional` methods.  It supports a limited subset of
     additive covariances.
 
-    For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to the
-    pymc examples documentation.
+    For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to
+    the pymc examples documentation.
 
     Parameters
     ----------
@@ -122,6 +121,9 @@ def __init__(
             raise ValueError(arg_err_msg)
         m = tuple(m)
 
+        if (L is None and c is None) or (L is not None and c is not None):
+            raise ValueError("Provide one of `c` or `L`.")
+
         if L is not None and (not isinstance(L, Sequence) or len(L) != cov_func.n_dims):
             raise ValueError(arg_err_msg)
         elif L is not None:
@@ -188,9 +190,7 @@ def _eigendecomposition(X, L, m, n_dims):
         omega = pt.sqrt(eigvals)
         return omega, phi, m_star
 
-    def prior(
-        self, name: str, X: Union[np.ndarray, pt.TensorVariable], dims: Optional[Dims] = None
-    ):
+    def prior(self, name: str, X: Union[np.ndarray, pt.TensorVariable], *args, **kwargs):
         R"""
         Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
 
@@ -210,10 +210,18 @@ def prior(
         self.f = pm.Deterministic(
             name,
             self.mean_func(X) + phi @ (self.beta * psd),
-            dims=dims,
+            dims=kwargs.get("dims"),
         )
         return self.f
 
+    def conditional_components(self, Xnew: Union[np.ndarray, pt.TensorVariable]):
+        Xnew, _ = self.cov_func._slice(Xnew)
+        omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.n_dims)
+        psd = self.cov_func.power_spectral_density(omega)
+
+        i = int(self.drop_first == True)
+        return phi[:, i:], psd[i:]
+
     def _build_conditional(self, Xnew, beta=None):
         if beta is None:
             try:
@@ -224,21 +232,13 @@ def _build_conditional(self, Xnew, beta=None):
                     "coefficients or call `.prior(name, X)` first."
                 )
 
-        Xnew, _ = self.cov_func._slice(Xnew)
-        omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.n_dims)
-        psd = self.cov_func.power_spectral_density(omega)
+        phi, psd = self.conditional_components(Xnew)
         return self.mean_func(Xnew) + phi @ (beta * psd)
 
-    def conditional(
-        self,
-        name: str,
-        Xnew: Union[np.ndarray, pt.TensorVariable],
-        dims: Optional[Dims] = None,
-        beta: Optional[pt.TensorVariable] = None,
-    ):
+    def conditional(self, name: str, Xnew: Union[np.ndarray, pt.TensorVariable], *args, **kwargs):
         R"""
         Returns the (approximate) conditional distribution evaluated over new input locations
-        `Xnew`.
+        `Xnew`.  If using the
 
         Parameters
         ----------
@@ -246,8 +246,8 @@ def conditional(
             Name of the random variable
         Xnew: array-like
             Function input values.
-        dims: None
-            Dimension name for the GP random variable.
+        kwargs: dict-like
+            Optional arguments such as `dims`, or the coefficients `beta`.
         """
-        fnew = self._build_conditional(Xnew, beta=beta)
-        return pm.Deterministic(name, fnew, dims=dims)
+        fnew = self._build_conditional(Xnew, beta=kwargs.get("beta"))
+        return pm.Deterministic(name, fnew, dims=kwargs.get("dims"))

From cfbfb17507873b098532a90ba8e2c5d1df65ae8d Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Thu, 26 Jan 2023 21:03:52 -0800
Subject: [PATCH 11/36] add parameterization arg

---
 pymc/gp/hsgp.py | 62 +++++++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 25 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 52e557ee8d..cb7ed246f8 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -103,8 +103,9 @@ def __init__(
         self,
         m: Sequence[int],
         L: Optional[Sequence[float]] = None,
-        c: Optional[float] = 1.5,
+        c: Optional[float] = None,
         drop_first: bool = False,
+        parameterization="noncentered",
         *,
         mean_func: Mean = Zero(),
         cov_func: Covariance,
@@ -122,7 +123,7 @@ def __init__(
         m = tuple(m)
 
         if (L is None and c is None) or (L is not None and c is not None):
-            raise ValueError("Provide one of `c` or `L`.")
+            raise ValueError("Provide one of `c` or `L`")
 
         if L is not None and (not isinstance(L, Sequence) or len(L) != cov_func.n_dims):
             raise ValueError(arg_err_msg)
@@ -135,6 +136,12 @@ def __init__(
                 "domain."
             )
 
+        parameterization = parameterization.lower().replace("-", "")
+        if parameterization not in ["centered", "noncentered"]:
+            raise ValueError("`parameterization` must be either 'centered' or 'noncentered'.")
+        else:
+            self.parameterization = parameterization
+
         self.drop_first = drop_first
         self.L = L
         self.m = m
@@ -174,7 +181,7 @@ def prior_components(self, X: Union[np.ndarray, pt.TensorVariable]):
         psd = self.cov_func.power_spectral_density(omega)
 
         i = int(self.drop_first == True)
-        return phi[:, i:], psd[i:]
+        return phi[:, i:], pt.sqrt(psd[i:])
 
     @staticmethod
     def _eigendecomposition(X, L, m, n_dims):
@@ -204,14 +211,17 @@ def prior(self, name: str, X: Union[np.ndarray, pt.TensorVariable], *args, **kwa
             Dimension name for the GP random variable.
         """
 
-        phi, psd = self.prior_components(X)
-        self.beta = pm.Normal(f"{name}_hsgp_coeffs_", size=psd.size)
+        phi, sqrt_psd = self.prior_components(X)
 
-        self.f = pm.Deterministic(
-            name,
-            self.mean_func(X) + phi @ (self.beta * psd),
-            dims=kwargs.get("dims"),
-        )
+        if self.parameterization == "noncentered":
+            self.beta = pm.Normal(f"{name}_hsgp_coeffs_", size=sqrt_psd.size)
+            f = self.mean_func(X) + phi @ (self.beta * sqrt_psd)
+
+        elif self.parameterization == "centered":
+            self.beta = pm.Normal(f"{name}_hsgp_coeffs_", sigma=sqrt_psd, size=sqrt_psd.size)
+            f = self.mean_func(X) + phi @ self.beta
+
+        self.f = pm.Deterministic(name, f, dims=kwargs.get("dims"))
         return self.f
 
     def conditional_components(self, Xnew: Union[np.ndarray, pt.TensorVariable]):
@@ -220,20 +230,22 @@ def conditional_components(self, Xnew: Union[np.ndarray, pt.TensorVariable]):
         psd = self.cov_func.power_spectral_density(omega)
 
         i = int(self.drop_first == True)
-        return phi[:, i:], psd[i:]
-
-    def _build_conditional(self, Xnew, beta=None):
-        if beta is None:
-            try:
-                beta = self.beta
-            except AttributeError:
-                raise ValueError(
-                    "Prior is not set, can't create a conditional. Either pass in the HSGP beta "
-                    "coefficients or call `.prior(name, X)` first."
-                )
+        return phi[:, i:], pt.sqrt(psd[i:])
+
+    def _build_conditional(self, Xnew):
+        try:
+            beta = self.beta
+        except AttributeError:
+            raise ValueError(
+                "Prior is not set, can't create a conditional.  Call `.prior(name, X)` first."
+            )
 
-        phi, psd = self.conditional_components(Xnew)
-        return self.mean_func(Xnew) + phi @ (beta * psd)
+        if self.parameterization == "noncentered":
+            phi, sqrt_psd = self.conditional_components(Xnew)
+            return self.mean_func(Xnew) + phi @ (beta * sqrt_psd)
+        elif self.parameterization == "centered":
+            phi, _ = self.conditional_components(Xnew)
+            return self.mean_func(Xnew) + phi @ beta
 
     def conditional(self, name: str, Xnew: Union[np.ndarray, pt.TensorVariable], *args, **kwargs):
         R"""
@@ -247,7 +259,7 @@ def conditional(self, name: str, Xnew: Union[np.ndarray, pt.TensorVariable], *ar
         Xnew: array-like
             Function input values.
         kwargs: dict-like
-            Optional arguments such as `dims`, or the coefficients `beta`.
+            Optional arguments such as `dims`.
         """
-        fnew = self._build_conditional(Xnew, beta=kwargs.get("beta"))
+        fnew = self._build_conditional(Xnew)
         return pm.Deterministic(name, fnew, dims=kwargs.get("dims"))

From 0ad2c0680af7b771729295bbf7b8682f8c08ec35 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Fri, 27 Jan 2023 11:07:26 -0800
Subject: [PATCH 12/36] make mypy happy

---
 pymc/gp/hsgp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index cb7ed246f8..2d39ea8a2c 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -130,7 +130,7 @@ def __init__(
         elif L is not None:
             L = tuple(L)
 
-        if L is None and c < 1.2:
+        if L is None and c is not None and c < 1.2:
             warnings.warn(
                 "Most applications will require `c >= 1.2` for accuracy at the boundaries of the "
                 "domain."

From 1474708b028c213104684b9d5f0906477be60c96 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 30 Jan 2023 15:08:11 -0800
Subject: [PATCH 13/36] dont need conditional_components, use pm.set_data

---
 pymc/gp/hsgp.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 2d39ea8a2c..8ccdd85db9 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -147,6 +147,7 @@ def __init__(
         self.m = m
         self.c = c
         self.n_dims = cov_func.n_dims
+        self._boundary_set = False
 
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
@@ -155,13 +156,16 @@ def __add__(self, other):
 
     def _set_boundary(self, X):
         """Make L from X and c if L is not passed in."""
-        if self.L is None:
-            # Define new L based on c and X range
-            La = pt.abs(pt.min(X, axis=0))
-            Lb = pt.abs(pt.max(X, axis=0))
-            self.L = self.c * pt.max(pt.stack((La, Lb)), axis=0)
-        else:
-            self.L = pt.as_tensor_variable(self.L)
+        if not self._boundary_set:
+            if self.L is None:
+                # Define new L based on c and X range
+                La = pt.abs(pt.min(X, axis=0))
+                Lb = pt.abs(pt.max(X, axis=0))
+                self.L = self.c * pt.max(pt.stack((La, Lb)), axis=0)
+            else:
+                self.L = pt.as_tensor_variable(self.L)
+
+            self._boundary_set = True
 
     def prior_components(self, X: Union[np.ndarray, pt.TensorVariable]):
         """Return the basis and coefficient priors, whose product is the HSGP.  It can be useful
@@ -224,14 +228,6 @@ def prior(self, name: str, X: Union[np.ndarray, pt.TensorVariable], *args, **kwa
         self.f = pm.Deterministic(name, f, dims=kwargs.get("dims"))
         return self.f
 
-    def conditional_components(self, Xnew: Union[np.ndarray, pt.TensorVariable]):
-        Xnew, _ = self.cov_func._slice(Xnew)
-        omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.n_dims)
-        psd = self.cov_func.power_spectral_density(omega)
-
-        i = int(self.drop_first == True)
-        return phi[:, i:], pt.sqrt(psd[i:])
-
     def _build_conditional(self, Xnew):
         try:
             beta = self.beta
@@ -240,12 +236,16 @@ def _build_conditional(self, Xnew):
                 "Prior is not set, can't create a conditional.  Call `.prior(name, X)` first."
             )
 
+        Xnew, _ = self.cov_func._slice(Xnew)
+        omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.n_dims)
+        i = int(self.drop_first == True)
+
         if self.parameterization == "noncentered":
-            phi, sqrt_psd = self.conditional_components(Xnew)
-            return self.mean_func(Xnew) + phi @ (beta * sqrt_psd)
+            psd = self.cov_func.power_spectral_density(omega)
+            return self.mean_func(Xnew) + phi[:, i:] @ (beta * pt.sqrt(psd[i:]))
+
         elif self.parameterization == "centered":
-            phi, _ = self.conditional_components(Xnew)
-            return self.mean_func(Xnew) + phi @ beta
+            return self.mean_func(Xnew) + phi[:, i:] @ beta
 
     def conditional(self, name: str, Xnew: Union[np.ndarray, pt.TensorVariable], *args, **kwargs):
         R"""

From ab339d0234f74b056dd36769a8fd2ccbc5fdd172 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 13 Feb 2023 22:45:30 -0800
Subject: [PATCH 14/36] refactor internals to work with pm.set_data, add more
 docstrings and example

---
 pymc/gp/hsgp.py | 158 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 116 insertions(+), 42 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 8ccdd85db9..50fc1fb2a9 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -48,7 +48,7 @@ class HSGP(Base):
         The boundary of the space for each `active_dim`.  It is called the boundary condition.
         Choose L such that the domain `[-L, L]` contains all points in the column of X given by the
         `active_dim`.
-    c: 1.5
+    c: float
         The proportion extension factor.  Used to construct L from X.  Defined as `S = max|X|` such
         that `X` is in `[-S, S]`.  `L` is the calculated as `c * S`.  One of `c` or `L` must be
         provided.  Further information can be found in Ruitort-Mayol et. al.
@@ -66,17 +66,19 @@ class HSGP(Base):
     .. code:: python
 
         # A three dimensional column vector of inputs.
-        X = np.random.randn(100, 3)
+        X = np.random.rand(100, 3)
 
         with pm.Model() as model:
             # Specify the covariance function.  Three input dimensions, but we only want to use the
             # last two.
             cov_func = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 2])
 
-            # Specify the HSGP.  Use 50 basis vectors across each active dimension, [1, 2]  for a
-            # total of 50 * 50 = 2500.  The range of the data is inferred from X, and the boundary
-            # condition multiplier `c` uses 4 * half range of the data, `L`.
-            gp = pm.gp.HSGP(m=[50, 50], c=4.0, cov_func=cov_func)
+            # Specify the HSGP.  Use 25 basis vectors across each active dimension, [1, 2]  for a
+            # total of 25 * 25 = 625.  The value `c = 4` means the boundary of the approximation
+            # lies at four times the half width of the data.  In this example the data lie between
+            # zero and  one, so the boundaries occur at -1.5 and 2.5.  The data, both for training
+            # and prediction should reside well within that boundary..  
+            gp = pm.gp.HSGP(m=[25, 25], c=4.0, cov_func=cov_func)
 
             # Place a GP prior over the function f.
             f = gp.prior("f", X=X)
@@ -147,60 +149,132 @@ def __init__(
         self.m = m
         self.c = c
         self.n_dims = cov_func.n_dims
+        self.m_star = int(np.prod(self.m))
         self._boundary_set = False
 
+        # Record whether or not L instead of c was passed on construction.
+        self.L_given = self.L is not None
+        
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
         raise NotImplementedError("Additive HSGPs aren't supported ")
 
-    def _set_boundary(self, X):
+    def _set_boundary(self, Xs):
         """Make L from X and c if L is not passed in."""
         if not self._boundary_set:
+            
+            self.S = pt.max(pt.abs(Xs), axis=0)
+            
             if self.L is None:
-                # Define new L based on c and X range
-                La = pt.abs(pt.min(X, axis=0))
-                Lb = pt.abs(pt.max(X, axis=0))
-                self.L = self.c * pt.max(pt.stack((La, Lb)), axis=0)
+                self.L = self.c * self.S
+            
             else:
+                self.c = self.L / self.S
                 self.L = pt.as_tensor_variable(self.L)
 
             self._boundary_set = True
 
-    def prior_components(self, X: Union[np.ndarray, pt.TensorVariable]):
-        """Return the basis and coefficient priors, whose product is the HSGP.  It can be useful
-        to bypass the GP API and work with the basis and coefficients directly for models with i.e.
-        multiple GPs.  It returns the basis `phi` and the power spectral density, `psd`.  The GP
-        `f` can be formed by `f = phi @ (pm.Normal("hsgp_coeffs", size=psd.size) * psd)`.
+    @property
+    def _eigenvalues(self):
+        S = np.meshgrid(*[np.arange(1, 1 + self.m[d]) for d in range(self.n_dims)])
+        S = np.vstack([s.flatten() for s in S]).T
+        return pt.square((np.pi * S) / (2 * self.L))
+
+    def _eigenvectors(self, X):
+        phi = pt.ones((X.shape[0], self.m_star))
+        for d in range(self.n_dims):
+            c = 1.0 / np.sqrt(self.L[d])
+            term1 = pt.sqrt(self._eigenvalues[:, d])
+            term2 = pt.tile(X[:, d][:, None], self.m_star) + self.L[d]
+            phi *= c * pt.sin(term1 * term2)
+        return phi
+
+    def prior_linearized(self, X: Union[np.ndarray, pt.TensorVariable]):
+        """ Returns the linearized version of the HSGP, the Laplace eigenfunctions and the square
+        root of the power spectral density needed to create the GP.  This function allows the user
+        to bypass the GP interface and work directly with the basis and coefficients directly.  
+        This format allows the user to create predictions using `pm.set_data` similarly to a 
+        linear model.  It also enables computational speed ups in multi-GP models since they may
+        share the same basis.  The return values are the Laplace eigenfunctions `phi`, and the 
+        square root of the power spectral density.
+
+        Correct results when using `prior_linearized` in tandem with `pm.set_data` and 
+        `pm.MutableData` require two conditions.  First, one must specify `L` instead of `c` when
+        the GP is constructed.  If not, a RuntimeError is raised.  Second, the `X` needs to be 
+        zero centered, so it's mean must be subtracted.  An example is given below. 
 
         Parameters
         ----------
         X: array-like
             Function input values.
+    
+        Examples
+        --------
+        .. code:: python
+
+            # A one dimensional column vector of inputs.
+            X = np.linspace(0, 10, 100)[:, None]
+
+            with pm.Model() as model:
+                eta = pm.Exponential("eta", lam=1.0)
+                ell = pm.InverseGamma("ell", mu=5.0, sigma=5.0)
+                cov_func = eta**2 * pm.gp.cov.ExpQuad(1, ls=ell)
+
+                # m = [200] means 200 basis vectors for the first dimenison
+                # L = [10] means the approximation is valid from Xs = [-10, 10]
+                gp = pm.gp.HSGP(m=[200], L=[10], cov_func=cov_func)
+
+                # Order is important.  First calculate the mean, then make X a shared variable,
+                # then subtract the mean.  When X is mutated later, the correct mean will be 
+                # subtracted.
+                X_mu = np.mean(X, axis=0)
+                X = pm.MutableData("X", X)
+                Xs = X - X_mu
+
+                # Pass the zero-subtracted Xs in to the GP
+                phi, sqrt_psd = gp.prior_linearized(Xs)
+
+                # Specify standard normal prior in the coefficients.  The number of which
+                # is given by the number of basis vectors, which is also saved in the GP object
+                # as m_star.
+                beta = pm.Normal("beta", size=gp.m_star)
+
+                # The (non-centered) GP approximation is given by 
+                f = pm.Deterministic("f", phi @ (beta * sqrt_psd))
+
+                ...
+
+
+            # Then it works just like a linear regression to predict on new data.
+            # First mutate the data X,
+            x_new = np.linspace(-10, 10, 100)
+            with model:
+                model.set_data("X", x_new[:, None])
+
+            # and then make predictions for the GP using posterior predictive sampling.
+            with model:
+                ppc = pm.sample_posterior_predictive(idata, var_names=["f"])
         """
+        if not self.L_given:
+            warnings.warn(
+                "Since `c` was given instead of `L` on construction, usage of `model.set_data` "
+                "will result in an incorrect posterior predictive.  Ignore this warning if you "
+                "dont plan on creating predictions this way.  To remove this warning pass `L` "
+                "instead of `c` to the HSGP constructor."
+            )
+
         X, _ = self.cov_func._slice(X)
+        
         self._set_boundary(X)
 
-        omega, phi, m_star = self._eigendecomposition(X, self.L, self.m, self.n_dims)
+        phi = self._eigenvectors(X)
+        omega = pt.sqrt(self._eigenvalues)
         psd = self.cov_func.power_spectral_density(omega)
-
+        
         i = int(self.drop_first == True)
         return phi[:, i:], pt.sqrt(psd[i:])
 
-    @staticmethod
-    def _eigendecomposition(X, L, m, n_dims):
-        """Construct the eigenvalues and eigenfunctions of the Laplace operator."""
-        m_star = pt.prod(m)
-        S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(n_dims)])
-        S = np.vstack([s.flatten() for s in S]).T
-        eigvals = pt.square((np.pi * S) / (2 * L))
-        phi = pt.ones((X.shape[0], m_star))
-        for d in range(n_dims):
-            c = 1.0 / np.sqrt(L[d])
-            phi *= c * pt.sin(pt.sqrt(eigvals[:, d]) * (pt.tile(X[:, d][:, None], m_star) + L[d]))
-        omega = pt.sqrt(eigvals)
-        return omega, phi, m_star
-
     def prior(self, name: str, X: Union[np.ndarray, pt.TensorVariable], *args, **kwargs):
         R"""
         Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
@@ -214,15 +288,16 @@ def prior(self, name: str, X: Union[np.ndarray, pt.TensorVariable], *args, **kwa
         dims: None
             Dimension name for the GP random variable.
         """
-
-        phi, sqrt_psd = self.prior_components(X)
+        self.X_mu = pt.mean(X, axis=0)
+        phi, sqrt_psd = self.prior_linearized(X - self.X_mu)
 
         if self.parameterization == "noncentered":
-            self.beta = pm.Normal(f"{name}_hsgp_coeffs_", size=sqrt_psd.size)
-            f = self.mean_func(X) + phi @ (self.beta * sqrt_psd)
+            self.beta = pm.Normal(f"{name}_hsgp_coeffs_", size=self.m_star)
+            self.sqrt_psd = sqrt_psd
+            f = self.mean_func(X) + phi @ (self.beta * self.sqrt_psd)
 
         elif self.parameterization == "centered":
-            self.beta = pm.Normal(f"{name}_hsgp_coeffs_", sigma=sqrt_psd, size=sqrt_psd.size)
+            self.beta = pm.Normal(f"{name}_hsgp_coeffs_", sigma=sqrt_psd)
             f = self.mean_func(X) + phi @ self.beta
 
         self.f = pm.Deterministic(name, f, dims=kwargs.get("dims"))
@@ -230,19 +305,18 @@ def prior(self, name: str, X: Union[np.ndarray, pt.TensorVariable], *args, **kwa
 
     def _build_conditional(self, Xnew):
         try:
-            beta = self.beta
+            beta, X_mu = self.beta, self.X_mu
         except AttributeError:
             raise ValueError(
                 "Prior is not set, can't create a conditional.  Call `.prior(name, X)` first."
             )
-
+        
         Xnew, _ = self.cov_func._slice(Xnew)
-        omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.n_dims)
+        phi = self._eigenvectors(Xnew - X_mu)
         i = int(self.drop_first == True)
 
         if self.parameterization == "noncentered":
-            psd = self.cov_func.power_spectral_density(omega)
-            return self.mean_func(Xnew) + phi[:, i:] @ (beta * pt.sqrt(psd[i:]))
+            return self.mean_func(Xnew) + phi[:, i:] @ (beta * self.sqrt_psd)
 
         elif self.parameterization == "centered":
             return self.mean_func(Xnew) + phi[:, i:] @ beta

From 47b0cf75b2b6813fb3fb262d3745698e66b53c02 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 13 Feb 2023 22:47:43 -0800
Subject: [PATCH 15/36] precommit

---
 pymc/gp/hsgp.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 50fc1fb2a9..fb0cace7ed 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -37,7 +37,7 @@ class HSGP(Base):
     additive covariances.
 
     For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to
-    the pymc examples documentation.
+    the pymc examples that use HSGP.
 
     Parameters
     ----------
@@ -77,7 +77,7 @@ class HSGP(Base):
             # total of 25 * 25 = 625.  The value `c = 4` means the boundary of the approximation
             # lies at four times the half width of the data.  In this example the data lie between
             # zero and  one, so the boundaries occur at -1.5 and 2.5.  The data, both for training
-            # and prediction should reside well within that boundary..  
+            # and prediction should reside well within that boundary..
             gp = pm.gp.HSGP(m=[25, 25], c=4.0, cov_func=cov_func)
 
             # Place a GP prior over the function f.
@@ -154,7 +154,7 @@ def __init__(
 
         # Record whether or not L instead of c was passed on construction.
         self.L_given = self.L is not None
-        
+
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
@@ -163,12 +163,12 @@ def __add__(self, other):
     def _set_boundary(self, Xs):
         """Make L from X and c if L is not passed in."""
         if not self._boundary_set:
-            
+
             self.S = pt.max(pt.abs(Xs), axis=0)
-            
+
             if self.L is None:
                 self.L = self.c * self.S
-            
+
             else:
                 self.c = self.L / self.S
                 self.L = pt.as_tensor_variable(self.L)
@@ -191,24 +191,24 @@ def _eigenvectors(self, X):
         return phi
 
     def prior_linearized(self, X: Union[np.ndarray, pt.TensorVariable]):
-        """ Returns the linearized version of the HSGP, the Laplace eigenfunctions and the square
+        """Returns the linearized version of the HSGP, the Laplace eigenfunctions and the square
         root of the power spectral density needed to create the GP.  This function allows the user
-        to bypass the GP interface and work directly with the basis and coefficients directly.  
-        This format allows the user to create predictions using `pm.set_data` similarly to a 
+        to bypass the GP interface and work directly with the basis and coefficients directly.
+        This format allows the user to create predictions using `pm.set_data` similarly to a
         linear model.  It also enables computational speed ups in multi-GP models since they may
-        share the same basis.  The return values are the Laplace eigenfunctions `phi`, and the 
+        share the same basis.  The return values are the Laplace eigenfunctions `phi`, and the
         square root of the power spectral density.
 
-        Correct results when using `prior_linearized` in tandem with `pm.set_data` and 
+        Correct results when using `prior_linearized` in tandem with `pm.set_data` and
         `pm.MutableData` require two conditions.  First, one must specify `L` instead of `c` when
-        the GP is constructed.  If not, a RuntimeError is raised.  Second, the `X` needs to be 
-        zero centered, so it's mean must be subtracted.  An example is given below. 
+        the GP is constructed.  If not, a RuntimeError is raised.  Second, the `X` needs to be
+        zero centered, so it's mean must be subtracted.  An example is given below.
 
         Parameters
         ----------
         X: array-like
             Function input values.
-    
+
         Examples
         --------
         .. code:: python
@@ -226,7 +226,7 @@ def prior_linearized(self, X: Union[np.ndarray, pt.TensorVariable]):
                 gp = pm.gp.HSGP(m=[200], L=[10], cov_func=cov_func)
 
                 # Order is important.  First calculate the mean, then make X a shared variable,
-                # then subtract the mean.  When X is mutated later, the correct mean will be 
+                # then subtract the mean.  When X is mutated later, the correct mean will be
                 # subtracted.
                 X_mu = np.mean(X, axis=0)
                 X = pm.MutableData("X", X)
@@ -240,7 +240,7 @@ def prior_linearized(self, X: Union[np.ndarray, pt.TensorVariable]):
                 # as m_star.
                 beta = pm.Normal("beta", size=gp.m_star)
 
-                # The (non-centered) GP approximation is given by 
+                # The (non-centered) GP approximation is given by
                 f = pm.Deterministic("f", phi @ (beta * sqrt_psd))
 
                 ...
@@ -265,13 +265,13 @@ def prior_linearized(self, X: Union[np.ndarray, pt.TensorVariable]):
             )
 
         X, _ = self.cov_func._slice(X)
-        
+
         self._set_boundary(X)
 
         phi = self._eigenvectors(X)
         omega = pt.sqrt(self._eigenvalues)
         psd = self.cov_func.power_spectral_density(omega)
-        
+
         i = int(self.drop_first == True)
         return phi[:, i:], pt.sqrt(psd[i:])
 
@@ -310,7 +310,7 @@ def _build_conditional(self, Xnew):
             raise ValueError(
                 "Prior is not set, can't create a conditional.  Call `.prior(name, X)` first."
             )
-        
+
         Xnew, _ = self.cov_func._slice(Xnew)
         phi = self._eigenvectors(Xnew - X_mu)
         i = int(self.drop_first == True)

From 14288b6ff6fecd4c265df22039fd79e5c5e3a987 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sat, 4 Mar 2023 10:39:04 -0800
Subject: [PATCH 16/36] refactor to be usable by outside libraries, add tests

---
 pymc/gp/hsgp.py            | 135 +++++++++++++++----------
 pymc/tests/gp/test_gp.py   |  76 ---------------
 pymc/tests/gp/test_hsgp.py | 195 +++++++++++++++++++++++++++++++++++++
 3 files changed, 279 insertions(+), 127 deletions(-)
 create mode 100644 pymc/tests/gp/test_hsgp.py

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index fb0cace7ed..deadb5072e 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -12,6 +12,7 @@
 
 import warnings
 
+from types import ModuleType
 from typing import Optional, Sequence, Union
 
 import numpy as np
@@ -24,6 +25,73 @@
 from pymc.gp.mean import Mean, Zero
 
 
+def set_boundaries(
+    Xs: np.ndarray,
+    L: Optional[Sequence] = None,
+    c: Optional[Union[float, Sequence]] = None,
+    tl: ModuleType = np,
+):
+    """R Compute the boundary over which the approximation is accurate using the centered input data
+    locations, `Xs`.  It is requried that `Xs` has a mean at zero for each dimension, such that
+    `np.mean(Xs, axis=0) == [0, ..., 0]`.  If `L` is provided, c is calculated instead.  `tl` stands
+    for "tensor library", so the user can choose whether basic calculations are done with pytensor
+    or numpy.
+
+    Parameters
+    ----------
+    Xs: array-like
+        Function input values.  Assumes they have been mean subtracted or centered at zero.
+    """
+
+    if tl.__name__ not in ("numpy", "pytensor.tensor"):
+        raise ValueError("tl must be either numpy or pytensor.tensor.")
+
+    S = tl.max(tl.abs(Xs), axis=0)
+
+    if L is None and c is not None:
+        L = c * S
+    elif c is None and L is not None:
+        c = L / S
+    else:
+        raise ValueError("At least one of `c` or `L` must be supplied.")
+
+    if tl.__name__ == "numpy":
+        L = tl.asarray(L)
+    elif tl.__name__ == "pytensor.tensor":
+        L = tl.as_tensor_variable(L)
+
+    return S, L, c
+
+
+def calc_eigenvalues(
+    L: Union[np.ndarray, pt.TensorConstant], m: Sequence[int], tl: ModuleType = np
+):
+    """R Calculate eigenvalues of the Laplacian."""
+    S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))])
+    S = np.vstack([s.flatten() for s in S]).T
+    return tl.square((np.pi * S) / (2 * L))
+
+
+def calc_eigenvectors(
+    Xs: Union[np.ndarray, pt.TensorVariable],
+    L: Union[np.ndarray, pt.TensorConstant],
+    eigvals: Union[np.ndarray, pt.TensorConstant],
+    m: Sequence[int],
+    tl: ModuleType = np,
+):
+    """R Calculate eigenvectors of the Laplacian.  These are used as basis vectors in the HSGP
+    approximation.
+    """
+    m_star = int(np.prod(m))
+    phi = tl.ones((Xs.shape[0], m_star))
+    for d in range(len(m)):
+        c = 1.0 / tl.sqrt(L[d])
+        term1 = tl.sqrt(eigvals[:, d])
+        term2 = tl.tile(Xs[:, d][:, None], m_star) + L[d]
+        phi *= c * tl.sin(term1 * term2)
+    return phi
+
+
 class HSGP(Base):
     R"""
     Hilbert Space Gaussian process
@@ -152,45 +220,12 @@ def __init__(
         self.m_star = int(np.prod(self.m))
         self._boundary_set = False
 
-        # Record whether or not L instead of c was passed on construction.
-        self.L_given = self.L is not None
-
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
         raise NotImplementedError("Additive HSGPs aren't supported ")
 
-    def _set_boundary(self, Xs):
-        """Make L from X and c if L is not passed in."""
-        if not self._boundary_set:
-
-            self.S = pt.max(pt.abs(Xs), axis=0)
-
-            if self.L is None:
-                self.L = self.c * self.S
-
-            else:
-                self.c = self.L / self.S
-                self.L = pt.as_tensor_variable(self.L)
-
-            self._boundary_set = True
-
-    @property
-    def _eigenvalues(self):
-        S = np.meshgrid(*[np.arange(1, 1 + self.m[d]) for d in range(self.n_dims)])
-        S = np.vstack([s.flatten() for s in S]).T
-        return pt.square((np.pi * S) / (2 * self.L))
-
-    def _eigenvectors(self, X):
-        phi = pt.ones((X.shape[0], self.m_star))
-        for d in range(self.n_dims):
-            c = 1.0 / np.sqrt(self.L[d])
-            term1 = pt.sqrt(self._eigenvalues[:, d])
-            term2 = pt.tile(X[:, d][:, None], self.m_star) + self.L[d]
-            phi *= c * pt.sin(term1 * term2)
-        return phi
-
-    def prior_linearized(self, X: Union[np.ndarray, pt.TensorVariable]):
+    def prior_linearized(self, Xs: Union[np.ndarray, pt.TensorVariable]):
         """Returns the linearized version of the HSGP, the Laplace eigenfunctions and the square
         root of the power spectral density needed to create the GP.  This function allows the user
         to bypass the GP interface and work directly with the basis and coefficients directly.
@@ -201,13 +236,13 @@ def prior_linearized(self, X: Union[np.ndarray, pt.TensorVariable]):
 
         Correct results when using `prior_linearized` in tandem with `pm.set_data` and
         `pm.MutableData` require two conditions.  First, one must specify `L` instead of `c` when
-        the GP is constructed.  If not, a RuntimeError is raised.  Second, the `X` needs to be
+        the GP is constructed.  If not, a RuntimeError is raised.  Second, the `Xs` needs to be
         zero centered, so it's mean must be subtracted.  An example is given below.
 
         Parameters
         ----------
-        X: array-like
-            Function input values.
+        Xs: array-like
+            Function input values.  Assumes they have been mean subtracted or centered at zero.
 
         Examples
         --------
@@ -233,7 +268,7 @@ def prior_linearized(self, X: Union[np.ndarray, pt.TensorVariable]):
                 Xs = X - X_mu
 
                 # Pass the zero-subtracted Xs in to the GP
-                phi, sqrt_psd = gp.prior_linearized(Xs)
+                phi, sqrt_psd = gp.prior_linearized(Xs=Xs)
 
                 # Specify standard normal prior in the coefficients.  The number of which
                 # is given by the number of basis vectors, which is also saved in the GP object
@@ -256,20 +291,17 @@ def prior_linearized(self, X: Union[np.ndarray, pt.TensorVariable]):
             with model:
                 ppc = pm.sample_posterior_predictive(idata, var_names=["f"])
         """
-        if not self.L_given:
-            warnings.warn(
-                "Since `c` was given instead of `L` on construction, usage of `model.set_data` "
-                "will result in an incorrect posterior predictive.  Ignore this warning if you "
-                "dont plan on creating predictions this way.  To remove this warning pass `L` "
-                "instead of `c` to the HSGP constructor."
-            )
 
-        X, _ = self.cov_func._slice(X)
+        # Index Xs using input_dim and active_dims of covariance function
+        Xs, _ = self.cov_func._slice(Xs)
 
-        self._set_boundary(X)
+        # If not provided, use Xs and c to set L
+        S, self.L, self.c = set_boundaries(Xs, self.L, self.c, tl=pt)
+        self._boundary_set = True
 
-        phi = self._eigenvectors(X)
-        omega = pt.sqrt(self._eigenvalues)
+        eigvals = calc_eigenvalues(self.L, self.m, tl=pt)
+        phi = calc_eigenvectors(Xs, self.L, eigvals, self.m, tl=pt)
+        omega = pt.sqrt(eigvals)
         psd = self.cov_func.power_spectral_density(omega)
 
         i = int(self.drop_first == True)
@@ -292,7 +324,7 @@ def prior(self, name: str, X: Union[np.ndarray, pt.TensorVariable], *args, **kwa
         phi, sqrt_psd = self.prior_linearized(X - self.X_mu)
 
         if self.parameterization == "noncentered":
-            self.beta = pm.Normal(f"{name}_hsgp_coeffs_", size=self.m_star)
+            self.beta = pm.Normal(f"{name}_hsgp_coeffs_", size=self.m_star - int(self.drop_first))
             self.sqrt_psd = sqrt_psd
             f = self.mean_func(X) + phi @ (self.beta * self.sqrt_psd)
 
@@ -312,7 +344,8 @@ def _build_conditional(self, Xnew):
             )
 
         Xnew, _ = self.cov_func._slice(Xnew)
-        phi = self._eigenvectors(Xnew - X_mu)
+        eigvals = calc_eigenvalues(self.L, self.m, tl=pt)
+        phi = calc_eigenvectors(Xnew - X_mu, self.L, eigvals, self.m, tl=pt)
         i = int(self.drop_first == True)
 
         if self.parameterization == "noncentered":
diff --git a/pymc/tests/gp/test_gp.py b/pymc/tests/gp/test_gp.py
index ddfe4f2e3d..07d605eacd 100644
--- a/pymc/tests/gp/test_gp.py
+++ b/pymc/tests/gp/test_gp.py
@@ -534,79 +534,3 @@ def testMarginalKronRaises(self):
             gp2 = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
         with pytest.raises(TypeError):
             gp1 + gp2
-
-
-class TestHSGP:
-    @pytest.fixture
-    def rng(self):
-        return np.random.RandomState(10)
-
-    @pytest.fixture
-    def data(self, rng):
-        X = rng.randn(100, 1)
-        X1 = rng.randn(52, 1)
-        return X, X1
-
-    @pytest.fixture
-    def X(self, data):
-        return data[0]
-
-    @pytest.fixture
-    def X1(self, data):
-        return data[1]
-
-    @pytest.fixture
-    def model(self):
-        return pm.Model()
-
-    @pytest.fixture
-    def cov_func(self):
-        return pm.gp.cov.ExpQuad(1, ls=0.1)
-
-    @pytest.fixture
-    def gp(self, cov_func):
-        gp = pm.gp.HSGP(m=[500], c=4.0, cov_func=cov_func)
-        return gp
-
-    def test_prior(self, model, gp, X):
-        # TODO: improve mathematical side of tests
-        # So far I just check interfaces are the same for latent and HSGP
-        with model:
-            f1 = gp.prior("f1", X)
-            assert pm.draw(f1).shape == (X.shape[0],)
-            assert ~np.isnan(pm.draw(f1)).any()
-
-    def test_conditional(self, model, gp, X, X1):
-        # TODO: improve mathematical side of tests
-        # So far I just check interfaces are the same for latent and HSGP
-        with model:
-            with pytest.raises(ValueError, match="Prior is not set"):
-                gp.conditional("f1", X1)
-            gp.prior("f1", X)
-            gp.conditional("f2", X1)
-
-    def test_parametrization_m(self):
-        cov_func = pm.gp.cov.ExpQuad(1, ls=0.1)
-        with pytest.raises(
-            ValueError, match="must be sequences with one element per active dimension"
-        ):
-            pm.gp.HSGP(m=500, c=4.0, cov_func=cov_func)
-
-        with pytest.raises(
-            ValueError, match="must be sequences with one element per active dimension"
-        ):
-            pm.gp.HSGP(m=[500, 12], c=4.0, cov_func=cov_func)
-
-    def test_parametrization_L(self):
-        cov_func = pm.gp.cov.ExpQuad(1, ls=0.1)
-        with pytest.raises(
-            ValueError, match="must be sequences with one element per active dimension"
-        ):
-            pm.gp.HSGP(m=[500], L=[12, 12], cov_func=cov_func)
-
-    @pytest.mark.parametrize("drop_first", [True, False])
-    def test_parametrization_drop_first(self, model, cov_func, X, drop_first):
-        with model:
-            gp = pm.gp.HSGP(m=[500], c=4.0, cov_func=cov_func, drop_first=drop_first)
-            gp.prior("f1", X)
-            assert model.f1_hsgp_coeffs_.type.shape == (500 - drop_first,)
diff --git a/pymc/tests/gp/test_hsgp.py b/pymc/tests/gp/test_hsgp.py
new file mode 100644
index 0000000000..b6748dc716
--- /dev/null
+++ b/pymc/tests/gp/test_hsgp.py
@@ -0,0 +1,195 @@
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+import arviz as az
+import numpy as np
+import pytensor
+import pytensor.tensor as pt
+import pytest
+import scipy as sp
+
+from scipy.spatial import distance
+
+import pymc as pm
+
+
+def build_mmd_func(sample1, sample2):
+
+    assert sample1.shape[1] == sample2.shape[1]
+
+    s1 = pt.matrix(name="s1", shape=sample1.shape)
+    s2 = pt.matrix(name="s2", shape=sample2.shape)
+
+    X = np.concatenate((sample1, sample2), axis=0)
+    test_ell = np.median(distance.pdist(X)) / 2
+
+    K = pm.gp.cov.ExpQuad(sample1.shape[1], ls=test_ell)
+    Kxx = K(s1)
+    Kyy = K(s2)
+    Kxy = K(s1, s2)
+
+    n_x, n_y = s1.shape[0], s2.shape[0]
+    mmd = (
+        (pt.sum(Kxx) / (n_x * (n_x - 1)))
+        + (pt.sum(Kyy) / (n_y * (n_y - 1)))
+        - 2 * pt.sum(Kxy) / (n_x * n_y)
+    )
+
+    calc_mmd = pytensor.function(inputs=[s1, s2], outputs=mmd)
+    return calc_mmd
+
+
+def two_sample_test(sample1, sample2, n_sims=1000, alpha=0.05):
+    """https://torchdrift.org/notebooks/note_on_mmd.html"""
+    # build function to calculate mmd
+    calc_mmd = build_mmd_func(sample1, sample2)
+
+    # simulate test statistic under null hypothesis
+    X = np.concatenate((sample1, sample2), axis=0)
+    half_N = int(X.shape[0] // 2)
+    ix = np.arange(half_N * 2)
+
+    h0 = []
+    for i in range(n_sims):
+        np.random.shuffle(ix)
+        X = X[ix, :]
+        h0.append(calc_mmd(X[:half_N, :], X[half_N:, :]))
+    h0 = np.asarray(h0)
+    critical_value = np.percentile(h0, 100 * (1 - alpha))
+    mmd = calc_mmd(sample1, sample2)
+    return h0, mmd, critical_value, mmd > critical_value
+
+
+class TestHSGP:
+    @pytest.fixture
+    def rng(self):
+        return np.random.RandomState(10)
+
+    @pytest.fixture
+    def data(self, rng):
+        # 1D dataset
+        X1 = np.linspace(-5, 5, 100)[:, None]
+
+        # 3D dataset
+        x1, x2, x3 = np.meshgrid(
+            np.linspace(0, 10, 5), np.linspace(20, 30, 5), np.linspace(10, 20, 5)
+        )
+        X2 = np.vstack([x1.flatten(), x2.flatten(), x3.flatten()]).T
+        return X1, X2
+
+    @pytest.fixture
+    def X1(self, data):
+        return data[0]
+
+    @pytest.fixture
+    def X2(self, data):
+        return data[1]
+
+    @pytest.fixture
+    def model(self):
+        return pm.Model()
+
+    @pytest.fixture
+    def cov_func(self):
+        return pm.gp.cov.ExpQuad(1, ls=1)
+
+    @pytest.fixture
+    def gp(self, cov_func):
+        gp = pm.gp.Latent(cov_func=cov_func)
+        return gp
+
+    def test_set_boundaries_1d(self, X1):
+        X1s = X1 - np.mean(X1, axis=0)
+        S, L, c = pm.gp.hsgp.set_boundaries(X1s, c=2)
+        assert np.all(S == 5)
+        assert np.all(L == 10)
+        assert c == 2
+
+    def test_set_boundaries_3d(self, X2):
+        X2s = X2 - np.mean(X2, axis=0)
+        S, L, c = pm.gp.hsgp.set_boundaries(X2s, c=2)
+        assert np.all(S == 5)
+        assert np.all(L == 10)
+        assert c == 2
+
+    def test_set_boundaries_exceptions(self, X1):
+        X1s = X1 - np.mean(X1, axis=0)
+
+        with pytest.raises(ValueError, match="At least one of"):
+            pm.gp.hsgp.set_boundaries(X1s, tl=np)
+
+        with pytest.raises(ValueError, match="tl must be either numpy or pytensor.tensor"):
+            pm.gp.hsgp.set_boundaries(X1s, c=2, tl=sp)
+
+    def test_parametrization(self):
+        err_msg = "`m` and L, if provided, must be sequences with one element per active dimension"
+
+        with pytest.raises(ValueError, match=err_msg):
+            # m must be a list
+            cov_func = pm.gp.cov.ExpQuad(1, ls=0.1)
+            pm.gp.HSGP(m=500, c=2, cov_func=cov_func)
+
+        with pytest.raises(ValueError, match=err_msg):
+            # m must have same length as L
+            cov_func = pm.gp.cov.ExpQuad(2, ls=[1, 2])
+            pm.gp.HSGP(m=[500], L=[12, 12], cov_func=cov_func)
+
+        with pytest.raises(ValueError, match=err_msg):
+            # m must have same length as L, and match number of active dims of cov_func
+            cov_func = pm.gp.cov.ExpQuad(1, ls=0.1)
+            pm.gp.HSGP(m=[500], L=[12, 12], cov_func=cov_func)
+
+        # pass without error, cov_func has 2 active dimensions, c given as scalar
+        cov_func = pm.gp.cov.ExpQuad(3, ls=[1, 2], active_dims=[0, 2])
+        pm.gp.HSGP(m=[50, 50], c=2, cov_func=cov_func)
+
+        # pass without error, all have two dimensions
+        cov_func = pm.gp.cov.ExpQuad(2, ls=[1, 2])
+        pm.gp.HSGP(m=[50, 50], L=[12, 12], cov_func=cov_func)
+
+    @pytest.mark.parametrize("drop_first", [True, False])
+    def test_parametrization_drop_first(self, model, cov_func, X1, drop_first):
+        n_basis = 100
+        with model:
+            gp = pm.gp.HSGP(m=[n_basis], c=4.0, cov_func=cov_func, drop_first=drop_first)
+            gp.prior("f1", X1)
+
+            n_coeffs = model.f1_hsgp_coeffs_.type.shape[0]
+            if drop_first:
+                assert (
+                    n_coeffs == n_basis - 1
+                ), f"one basis vector should have been dropped, {n_coeffs}"
+            else:
+                assert n_coeffs == n_basis, "one was dropped when it shouldn't have been"
+
+    @pytest.mark.parametrize("parameterization", ["centered", "noncentered"])
+    def test_prior(self, model, cov_func, X1, parameterization):
+        """Compare HSGP to unapproximated GP, pm.gp.Latent.  Draw samples from the prior and
+        compare them using MMD two sample test.  Tests both centered and non-centered
+        parameterizations.
+        """
+        with model:
+            hsgp = pm.gp.HSGP(m=[200], c=2.0, parameterization=parameterization, cov_func=cov_func)
+            f1 = hsgp.prior("f1", X=X1)
+
+            gp = pm.gp.Latent(cov_func=cov_func)
+            f2 = gp.prior("f2", X=X1)
+
+            idata = pm.sample_prior_predictive(samples=1000)
+
+        samples1 = az.extract(idata.prior["f1"])["f1"].values.T
+        samples2 = az.extract(idata.prior["f2"])["f2"].values.T
+
+        h0, mmd, critical_value, reject = two_sample_test(
+            samples1, samples2, n_sims=500, alpha=0.05
+        )
+        assert not reject, "H0 was rejected, even though HSGP and GP priors should match."

From 2a56db7afea114c48c703cc0436353e04c49ff86 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sat, 4 Mar 2023 18:33:01 -0800
Subject: [PATCH 17/36] update copyright to 2023

---
 pymc/gp/__init__.py        |  2 +-
 pymc/gp/gp.py              |  2 +-
 pymc/gp/hsgp.py            |  2 ++
 pymc/gp/mean.py            |  2 +-
 pymc/gp/util.py            |  2 +-
 pymc/tests/gp/test_hsgp.py | 34 +++++++++++++++++++++++++++++++---
 6 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/pymc/gp/__init__.py b/pymc/gp/__init__.py
index 995a5f9b87..6706b0e9f0 100644
--- a/pymc/gp/__init__.py
+++ b/pymc/gp/__init__.py
@@ -1,4 +1,4 @@
-#   Copyright 2020 The PyMC Developers
+#   Copyright 2023 The PyMC Developers
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 261c21bc89..e6bae18456 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -1,4 +1,4 @@
-#   Copyright 2020 The PyMC Developers
+#   Copyright 2023 The PyMC Developers
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index deadb5072e..a01c809cc6 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -1,3 +1,5 @@
+#   Copyright 2023 The PyMC Developers
+#
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
 #   You may obtain a copy of the License at
diff --git a/pymc/gp/mean.py b/pymc/gp/mean.py
index 0370b12937..ccde742e99 100644
--- a/pymc/gp/mean.py
+++ b/pymc/gp/mean.py
@@ -1,4 +1,4 @@
-#   Copyright 2020 The PyMC Developers
+#   Copyright 2023 The PyMC Developers
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 0ebf728849..a677a25593 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -1,4 +1,4 @@
-#   Copyright 2020 The PyMC Developers
+#   Copyright 2023 The PyMC Developers
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
diff --git a/pymc/tests/gp/test_hsgp.py b/pymc/tests/gp/test_hsgp.py
index b6748dc716..01281dbd9a 100644
--- a/pymc/tests/gp/test_hsgp.py
+++ b/pymc/tests/gp/test_hsgp.py
@@ -1,3 +1,5 @@
+#   Copyright 2023 The PyMC Developers
+#
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
 #   You may obtain a copy of the License at
@@ -23,6 +25,7 @@
 
 
 def build_mmd_func(sample1, sample2):
+    """Build a PyTensor function that calculates the minimum mean discrepancy (MMD) statistic."""
 
     assert sample1.shape[1] == sample2.shape[1]
 
@@ -49,7 +52,11 @@ def build_mmd_func(sample1, sample2):
 
 
 def two_sample_test(sample1, sample2, n_sims=1000, alpha=0.05):
-    """https://torchdrift.org/notebooks/note_on_mmd.html"""
+    """Calculate test whose null hypothesis is that two sets of samples were drawn from
+    the same distribution.
+
+    Largely taken from https://torchdrift.org/notebooks/note_on_mmd.html
+    """
     # build function to calculate mmd
     calc_mmd = build_mmd_func(sample1, sample2)
 
@@ -173,8 +180,8 @@ def test_parametrization_drop_first(self, model, cov_func, X1, drop_first):
 
     @pytest.mark.parametrize("parameterization", ["centered", "noncentered"])
     def test_prior(self, model, cov_func, X1, parameterization):
-        """Compare HSGP to unapproximated GP, pm.gp.Latent.  Draw samples from the prior and
-        compare them using MMD two sample test.  Tests both centered and non-centered
+        """Compare HSGP prior to unapproximated GP prior, pm.gp.Latent.  Draw samples from the
+        prior and compare them using MMD two sample test.  Tests both centered and non-centered
         parameterizations.
         """
         with model:
@@ -193,3 +200,24 @@ def test_prior(self, model, cov_func, X1, parameterization):
             samples1, samples2, n_sims=500, alpha=0.05
         )
         assert not reject, "H0 was rejected, even though HSGP and GP priors should match."
+
+    @pytest.mark.parametrize("parameterization", ["centered", "noncentered"])
+    def test_conditional(self, model, cov_func, X1, parameterization):
+        """Compare HSGP conditional to unapproximated GP prior, pm.gp.Latent.  Draw samples from the
+        prior and compare them using MMD two sample test.  Tests both centered and non-centered
+        parameterizations.  The conditional should match the prior when no data is observed.
+        """
+        with model:
+            hsgp = pm.gp.HSGP(m=[100], c=2.0, parameterization=parameterization, cov_func=cov_func)
+            f = hsgp.prior("f", X=X1)
+            fc = hsgp.conditional("fc", Xnew=X1)
+
+            idata = pm.sample_prior_predictive(samples=1000)
+
+        samples1 = az.extract(idata.prior["f"])["f"].values.T
+        samples2 = az.extract(idata.prior["fc"])["fc"].values.T
+
+        h0, mmd, critical_value, reject = two_sample_test(
+            samples1, samples2, n_sims=500, alpha=0.05
+        )
+        assert not reject, "H0 was rejected, even though HSGP prior and conditional should match."

From 1616e73ff5672cc9387c51d7a7916442ef0ac094 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sat, 4 Mar 2023 18:34:31 -0800
Subject: [PATCH 18/36] add error messages to cov psd tests

---
 pymc/gp/cov.py            | 14 +++++---------
 pymc/tests/gp/test_cov.py | 17 ++++++++++++-----
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 558419fc8e..1d419c2988 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -1,4 +1,4 @@
-#   Copyright 2020 The PyMC Developers
+#   Copyright 2023 The PyMC Developers
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
@@ -12,11 +12,11 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+import numbers
 import warnings
 
 from collections import Counter
 from functools import reduce
-from numbers import Number
 from operator import add, mul
 from typing import Optional, Sequence
 
@@ -57,7 +57,7 @@ def _verify_scalar(value):
         return at.squeeze(value)
     elif np.asarray(value).squeeze().shape == ():
         return np.squeeze(value)
-    elif isinstance(value, Number):
+    elif isinstance(value, numbers.Real):
         return value
 
     raise ValueError("A scalar value is required.")
@@ -95,12 +95,8 @@ def full(self, X, Xs=None):
     def __add__(self, other):
         # If it's a scalar, cast as Constant covariance.  This allows validation for power spectral
         # density calc.
-        if isinstance(other, Number):
-            try:
-                other = Constant(c=_verify_scalar(other))
-            except ValueError:
-                pass
-
+        if isinstance(other, numbers.Real):
+            other = Constant(c=other)
         return Add([self, other])
 
     def __mul__(self, other):
diff --git a/pymc/tests/gp/test_cov.py b/pymc/tests/gp/test_cov.py
index aba96067c8..822e620db4 100644
--- a/pymc/tests/gp/test_cov.py
+++ b/pymc/tests/gp/test_cov.py
@@ -1,4 +1,4 @@
-#   Copyright 2020 The PyMC Developers
+#   Copyright 2023 The PyMC Developers
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
@@ -202,7 +202,8 @@ def test_copsd_multiply(self):
             cov1 = 2 * pm.gp.cov.ExpQuad(1, ls=1)
             cov2 = pm.gp.cov.ExpQuad(1, ls=1)
 
-        with pytest.raises(NotImplementedError):
+        msg = "The power spectral density of products of covariance functions is not implemented"
+        with pytest.raises(NotImplementedError, match=msg):
             psd = (cov1 * cov2).power_spectral_density(omega[:, None]).eval()
 
     def test_covpsd_nonstationary1(self):
@@ -211,7 +212,8 @@ def test_covpsd_nonstationary1(self):
         with pm.Model() as model:
             cov = 2 * pm.gp.cov.Linear(1, c=5)
 
-        with pytest.raises(ValueError):
+        msg = "can only be calculated for `Stationary` covariance functions."
+        with pytest.raises(ValueError, match=msg):
             psd = cov.power_spectral_density(omega[:, None]).eval()
 
     def test_covpsd_nonstationary2(self):
@@ -220,7 +222,11 @@ def test_covpsd_nonstationary2(self):
         with pm.Model() as model:
             cov = 2 * pm.gp.cov.ExpQuad(1, ls=1) + 10.0
 
-        with pytest.raises(ValueError):
+        # Even though this should error, this isnt the appropriate message.  The actual problem
+        # is because the covariance function is non-stationary. The underlying bug is due to
+        # `Constant` covariances not having an input_dim.
+        msg = "All covariances must have the same `input_dim`."
+        with pytest.raises(ValueError, match=msg):
             psd = cov.power_spectral_density(omega[:, None]).eval()
 
     def test_covpsd_notimplemented(self):
@@ -232,7 +238,8 @@ class NewStationaryCov(pm.gp.cov.Stationary):
         with pm.Model() as model:
             cov = 2 * NewStationaryCov(1, ls=1)
 
-        with pytest.raises(NotImplementedError):
+        msg = "No power spectral density method has been implemented"
+        with pytest.raises(NotImplementedError, match=msg):
             psd = cov.power_spectral_density(omega[:, None]).eval()
 
 

From 9afbe7ce60a708e046ac7c7fbf51bcd49dea5447 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sat, 4 Mar 2023 19:19:06 -0800
Subject: [PATCH 19/36] only error if neither c or L are passed, both is OK.
 also fix precommit issue with cov.py

---
 pymc/gp/cov.py  | 4 ----
 pymc/gp/hsgp.py | 3 ++-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 1d419c2988..2867c68d6f 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -236,7 +236,6 @@ def _merge_factors_cov(self, X, Xs=None, diag=False):
         """
         factor_list = []
         for factor in self._factor_list:
-
             # make sure diag=True is handled properly
             if isinstance(factor, BaseCovariance):
                 factor_list.append(factor(X, Xs, diag))
@@ -255,7 +254,6 @@ def _merge_factors_cov(self, X, Xs=None, diag=False):
                     TensorSharedVariable,
                 ),
             ):
-
                 if factor.ndim == 2 and diag:
                     factor_list.append(at.diag(factor))
                 else:
@@ -275,7 +273,6 @@ def _merge_factors_psd(self, omega):
         factor_list = []
         for factor in self._factor_list:
             if isinstance(factor, Covariance):
-
                 # Allow merging covariances for psd only if active_dims are the same
                 if set(self.active_dims) != set(factor.active_dims):
                     raise ValueError(
@@ -288,7 +285,6 @@ def _merge_factors_psd(self, omega):
                     factor_list.append(factor.power_spectral_density(omega))
 
                 except (AttributeError, NotImplementedError) as e:
-
                     if isinstance(factor, Stationary):
                         raise NotImplementedError(
                             f"No power spectral density method has been implemented for {factor}."
diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index a01c809cc6..c765cc7863 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -54,8 +54,9 @@ def set_boundaries(
         L = c * S
     elif c is None and L is not None:
         c = L / S
-    else:
+    elif c is None and L is None:
         raise ValueError("At least one of `c` or `L` must be supplied.")
+    # if both are passed, L takes precedent.
 
     if tl.__name__ == "numpy":
         L = tl.asarray(L)

From a12df20c8666bbed0e6c745b4949fe3137600607 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sat, 4 Mar 2023 21:22:04 -0800
Subject: [PATCH 20/36] fail to fix mypy errors

---
 pymc/gp/hsgp.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index c765cc7863..42d56b6647 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -15,7 +15,7 @@
 import warnings
 
 from types import ModuleType
-from typing import Optional, Sequence, Union
+from typing import Optional, Sequence, Tuple, Union
 
 import numpy as np
 import pytensor.tensor as pt
@@ -26,13 +26,16 @@
 from pymc.gp.gp import Base
 from pymc.gp.mean import Mean, Zero
 
+TensorVariable = Union[np.ndarray, pt.TensorVariable]
+TensorConstant = Union[np.ndarray, pt.TensorConstant]
+
 
 def set_boundaries(
-    Xs: np.ndarray,
+    Xs: TensorVariable,
     L: Optional[Sequence] = None,
     c: Optional[Union[float, Sequence]] = None,
     tl: ModuleType = np,
-):
+) -> Tuple[TensorVariable, TensorVariable, Union[TensorConstant, float]]:
     """R Compute the boundary over which the approximation is accurate using the centered input data
     locations, `Xs`.  It is requried that `Xs` has a mean at zero for each dimension, such that
     `np.mean(Xs, axis=0) == [0, ..., 0]`.  If `L` is provided, c is calculated instead.  `tl` stands
@@ -66,19 +69,19 @@ def set_boundaries(
     return S, L, c
 
 
-def calc_eigenvalues(
-    L: Union[np.ndarray, pt.TensorConstant], m: Sequence[int], tl: ModuleType = np
-):
+def calc_eigenvalues(L: TensorConstant, m: Sequence[int], tl: ModuleType = np):
     """R Calculate eigenvalues of the Laplacian."""
     S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))])
     S = np.vstack([s.flatten() for s in S]).T
+    top = np.pi * S
+    bot = 2 * L
     return tl.square((np.pi * S) / (2 * L))
 
 
 def calc_eigenvectors(
-    Xs: Union[np.ndarray, pt.TensorVariable],
-    L: Union[np.ndarray, pt.TensorConstant],
-    eigvals: Union[np.ndarray, pt.TensorConstant],
+    Xs: TensorVariable,
+    L: TensorConstant,
+    eigvals: TensorConstant,
     m: Sequence[int],
     tl: ModuleType = np,
 ):
@@ -228,7 +231,7 @@ def __init__(
     def __add__(self, other):
         raise NotImplementedError("Additive HSGPs aren't supported ")
 
-    def prior_linearized(self, Xs: Union[np.ndarray, pt.TensorVariable]):
+    def prior_linearized(self, Xs: TensorVariable):
         """Returns the linearized version of the HSGP, the Laplace eigenfunctions and the square
         root of the power spectral density needed to create the GP.  This function allows the user
         to bypass the GP interface and work directly with the basis and coefficients directly.
@@ -310,7 +313,7 @@ def prior_linearized(self, Xs: Union[np.ndarray, pt.TensorVariable]):
         i = int(self.drop_first == True)
         return phi[:, i:], pt.sqrt(psd[i:])
 
-    def prior(self, name: str, X: Union[np.ndarray, pt.TensorVariable], *args, **kwargs):
+    def prior(self, name: str, X: TensorVariable, *args, **kwargs):
         R"""
         Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
 
@@ -357,7 +360,7 @@ def _build_conditional(self, Xnew):
         elif self.parameterization == "centered":
             return self.mean_func(Xnew) + phi[:, i:] @ beta
 
-    def conditional(self, name: str, Xnew: Union[np.ndarray, pt.TensorVariable], *args, **kwargs):
+    def conditional(self, name: str, Xnew: TensorVariable, *args, **kwargs):
         R"""
         Returns the (approximate) conditional distribution evaluated over new input locations
         `Xnew`.  If using the

From 5218a2595906fadf7a2d3ee2d0c309bba42eaa7a Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sat, 4 Mar 2023 22:23:47 -0800
Subject: [PATCH 21/36] clean up

---
 pymc/gp/hsgp.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 42d56b6647..d68c5648c9 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -73,8 +73,6 @@ def calc_eigenvalues(L: TensorConstant, m: Sequence[int], tl: ModuleType = np):
     """R Calculate eigenvalues of the Laplacian."""
     S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))])
     S = np.vstack([s.flatten() for s in S]).T
-    top = np.pi * S
-    bot = 2 * L
     return tl.square((np.pi * S) / (2 * L))
 
 

From 133b7835903e4ea49267e835c8fb6b1dae945378 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sun, 5 Mar 2023 20:42:22 -0800
Subject: [PATCH 22/36] Update pymc/gp/hsgp.py

Co-authored-by: Michael Osthege <michael.osthege@outlook.com>
---
 pymc/gp/hsgp.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index d68c5648c9..36f1f87bb1 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -52,6 +52,7 @@ def set_boundaries(
         raise ValueError("tl must be either numpy or pytensor.tensor.")
 
     S = tl.max(tl.abs(Xs), axis=0)
+    assert isinstance(S, (np.ndarray, TensorVariable))
 
     if L is None and c is not None:
         L = c * S

From cb6c01f465162ff8c4cd32c8675f7abc5b469c70 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sun, 5 Mar 2023 20:43:42 -0800
Subject: [PATCH 23/36] Update pymc/gp/hsgp.py

Co-authored-by: Michael Osthege <michael.osthege@outlook.com>
---
 pymc/gp/hsgp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 36f1f87bb1..cc6f1c05b9 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -73,8 +73,8 @@ def set_boundaries(
 def calc_eigenvalues(L: TensorConstant, m: Sequence[int], tl: ModuleType = np):
     """R Calculate eigenvalues of the Laplacian."""
     S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))])
-    S = np.vstack([s.flatten() for s in S]).T
-    return tl.square((np.pi * S) / (2 * L))
+    Sarr = np.vstack([s.flatten() for s in S]).T
+    return tl.square((np.pi * Sarr) / (2 * L))
 
 
 def calc_eigenvectors(

From d0e8feb187be3a1b373fafb63c56f53171606e77 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 6 Mar 2023 00:43:39 -0800
Subject: [PATCH 24/36] fix mypy errors

---
 pymc/gp/hsgp.py       | 143 ++++++++++++++++++------------------------
 tests/gp/test_hsgp.py |  17 +----
 2 files changed, 64 insertions(+), 96 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index cc6f1c05b9..5e58dd4fc3 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -15,7 +15,7 @@
 import warnings
 
 from types import ModuleType
-from typing import Optional, Sequence, Tuple, Union
+from typing import Optional, Sequence, Union
 
 import numpy as np
 import pytensor.tensor as pt
@@ -30,51 +30,20 @@
 TensorConstant = Union[np.ndarray, pt.TensorConstant]
 
 
-def set_boundaries(
-    Xs: TensorVariable,
-    L: Optional[Sequence] = None,
-    c: Optional[Union[float, Sequence]] = None,
-    tl: ModuleType = np,
-) -> Tuple[TensorVariable, TensorVariable, Union[TensorConstant, float]]:
-    """R Compute the boundary over which the approximation is accurate using the centered input data
-    locations, `Xs`.  It is requried that `Xs` has a mean at zero for each dimension, such that
-    `np.mean(Xs, axis=0) == [0, ..., 0]`.  If `L` is provided, c is calculated instead.  `tl` stands
-    for "tensor library", so the user can choose whether basic calculations are done with pytensor
-    or numpy.
-
-    Parameters
-    ----------
-    Xs: array-like
-        Function input values.  Assumes they have been mean subtracted or centered at zero.
+def set_boundary(Xs: TensorVariable, c: Union[float, TensorVariable]) -> TensorVariable:
+    """Set the boundary using the mean-subtracted `Xs` and `c`.  `c` is usually a scalar
+    multiplyer greater than 1.0, but it may be one value per dimension or column of `Xs`.
     """
-
-    if tl.__name__ not in ("numpy", "pytensor.tensor"):
-        raise ValueError("tl must be either numpy or pytensor.tensor.")
-
-    S = tl.max(tl.abs(Xs), axis=0)
-    assert isinstance(S, (np.ndarray, TensorVariable))
-
-    if L is None and c is not None:
-        L = c * S
-    elif c is None and L is not None:
-        c = L / S
-    elif c is None and L is None:
-        raise ValueError("At least one of `c` or `L` must be supplied.")
-    # if both are passed, L takes precedent.
-
-    if tl.__name__ == "numpy":
-        L = tl.asarray(L)
-    elif tl.__name__ == "pytensor.tensor":
-        L = tl.as_tensor_variable(L)
-
-    return S, L, c
+    S = pt.max(pt.abs(Xs), axis=0)
+    L = c * S
+    return L
 
 
 def calc_eigenvalues(L: TensorConstant, m: Sequence[int], tl: ModuleType = np):
-    """R Calculate eigenvalues of the Laplacian."""
+    """Calculate eigenvalues of the Laplacian."""
     S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))])
-    Sarr = np.vstack([s.flatten() for s in S]).T
-    return tl.square((np.pi * Sarr) / (2 * L))
+    S_arr = np.vstack([s.flatten() for s in S]).T
+    return tl.square((np.pi * S_arr) / (2 * L))
 
 
 def calc_eigenvectors(
@@ -84,7 +53,7 @@ def calc_eigenvectors(
     m: Sequence[int],
     tl: ModuleType = np,
 ):
-    """R Calculate eigenvectors of the Laplacian.  These are used as basis vectors in the HSGP
+    """Calculate eigenvectors of the Laplacian.  These are used as basis vectors in the HSGP
     approximation.
     """
     m_star = int(np.prod(m))
@@ -202,33 +171,36 @@ def __init__(
 
         if L is not None and (not isinstance(L, Sequence) or len(L) != cov_func.n_dims):
             raise ValueError(arg_err_msg)
-        elif L is not None:
-            L = tuple(L)
 
         if L is None and c is not None and c < 1.2:
-            warnings.warn(
-                "Most applications will require `c >= 1.2` for accuracy at the boundaries of the "
-                "domain."
-            )
+            warnings.warn("For an adequate approximation `c >= 1.2` is recommended.")
 
-        parameterization = parameterization.lower().replace("-", "")
+        parameterization = parameterization.lower().replace("-", "").replace("_", "")
         if parameterization not in ["centered", "noncentered"]:
             raise ValueError("`parameterization` must be either 'centered' or 'noncentered'.")
         else:
-            self.parameterization = parameterization
+            self._parameterization = parameterization
 
-        self.drop_first = drop_first
-        self.L = L
-        self.m = m
-        self.c = c
-        self.n_dims = cov_func.n_dims
-        self.m_star = int(np.prod(self.m))
-        self._boundary_set = False
+        self._drop_first = drop_first
+        self._m = m
+        self._m_star = int(np.prod(self._m))
+        self._L = L
+        self._c = c
 
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
-        raise NotImplementedError("Additive HSGPs aren't supported ")
+        raise NotImplementedError("Additive HSGPs aren't supported.")
+
+    @property
+    def L(self):
+        if self._L is None:
+            raise RuntimeError("Boundaries `L` required but still unset.")
+        return self._L
+
+    @L.setter
+    def L(self, value):
+        self._L = value
 
     def prior_linearized(self, Xs: TensorVariable):
         """Returns the linearized version of the HSGP, the Laplace eigenfunctions and the square
@@ -268,9 +240,9 @@ def prior_linearized(self, Xs: TensorVariable):
                 # Order is important.  First calculate the mean, then make X a shared variable,
                 # then subtract the mean.  When X is mutated later, the correct mean will be
                 # subtracted.
-                X_mu = np.mean(X, axis=0)
+                X_mean = np.mean(X, axis=0)
                 X = pm.MutableData("X", X)
-                Xs = X - X_mu
+                Xs = X - X_mean
 
                 # Pass the zero-subtracted Xs in to the GP
                 phi, sqrt_psd = gp.prior_linearized(Xs=Xs)
@@ -301,15 +273,18 @@ def prior_linearized(self, Xs: TensorVariable):
         Xs, _ = self.cov_func._slice(Xs)
 
         # If not provided, use Xs and c to set L
-        S, self.L, self.c = set_boundaries(Xs, self.L, self.c, tl=pt)
-        self._boundary_set = True
+        if self._L is None:
+            assert isinstance(self._c, (float, np.ndarray, pt.TensorVariable))
+            self.L = set_boundary(Xs, self._c)
+        else:
+            self.L = self._L
 
-        eigvals = calc_eigenvalues(self.L, self.m, tl=pt)
-        phi = calc_eigenvectors(Xs, self.L, eigvals, self.m, tl=pt)
+        eigvals = calc_eigenvalues(self.L, self._m, tl=pt)
+        phi = calc_eigenvectors(Xs, self.L, eigvals, self._m, tl=pt)
         omega = pt.sqrt(eigvals)
         psd = self.cov_func.power_spectral_density(omega)
 
-        i = int(self.drop_first == True)
+        i = int(self._drop_first == True)
         return phi[:, i:], pt.sqrt(psd[i:])
 
     def prior(self, name: str, X: TensorVariable, *args, **kwargs):
@@ -325,38 +300,44 @@ def prior(self, name: str, X: TensorVariable, *args, **kwargs):
         dims: None
             Dimension name for the GP random variable.
         """
-        self.X_mu = pt.mean(X, axis=0)
-        phi, sqrt_psd = self.prior_linearized(X - self.X_mu)
+        self._X_mean = pt.mean(X, axis=0)
+        phi, sqrt_psd = self.prior_linearized(X - self._X_mean)
 
-        if self.parameterization == "noncentered":
-            self.beta = pm.Normal(f"{name}_hsgp_coeffs_", size=self.m_star - int(self.drop_first))
-            self.sqrt_psd = sqrt_psd
-            f = self.mean_func(X) + phi @ (self.beta * self.sqrt_psd)
+        if self._parameterization == "noncentered":
+            self._beta = pm.Normal(
+                f"{name}_hsgp_coeffs_", size=self._m_star - int(self._drop_first)
+            )
+            self._sqrt_psd = sqrt_psd
+            f = self.mean_func(X) + phi @ (self._beta * self._sqrt_psd)
 
-        elif self.parameterization == "centered":
-            self.beta = pm.Normal(f"{name}_hsgp_coeffs_", sigma=sqrt_psd)
-            f = self.mean_func(X) + phi @ self.beta
+        elif self._parameterization == "centered":
+            self._beta = pm.Normal(f"{name}_hsgp_coeffs_", sigma=sqrt_psd)
+            f = self.mean_func(X) + phi @ self._beta
 
         self.f = pm.Deterministic(name, f, dims=kwargs.get("dims"))
         return self.f
 
     def _build_conditional(self, Xnew):
         try:
-            beta, X_mu = self.beta, self.X_mu
+            beta, X_mean = self._beta, self._X_mean
+
+            if self._parameterization == "noncentered":
+                sqrt_psd = self._sqrt_psd
+
         except AttributeError:
             raise ValueError(
                 "Prior is not set, can't create a conditional.  Call `.prior(name, X)` first."
             )
 
         Xnew, _ = self.cov_func._slice(Xnew)
-        eigvals = calc_eigenvalues(self.L, self.m, tl=pt)
-        phi = calc_eigenvectors(Xnew - X_mu, self.L, eigvals, self.m, tl=pt)
-        i = int(self.drop_first == True)
+        eigvals = calc_eigenvalues(self.L, self._m, tl=pt)
+        phi = calc_eigenvectors(Xnew - X_mean, self.L, eigvals, self._m, tl=pt)
+        i = int(self._drop_first == True)
 
-        if self.parameterization == "noncentered":
-            return self.mean_func(Xnew) + phi[:, i:] @ (beta * self.sqrt_psd)
+        if self._parameterization == "noncentered":
+            return self.mean_func(Xnew) + phi[:, i:] @ (beta * sqrt_psd)
 
-        elif self.parameterization == "centered":
+        elif self._parameterization == "centered":
             return self.mean_func(Xnew) + phi[:, i:] @ beta
 
     def conditional(self, name: str, Xnew: TensorVariable, *args, **kwargs):
diff --git a/tests/gp/test_hsgp.py b/tests/gp/test_hsgp.py
index 01281dbd9a..ed2f318e5d 100644
--- a/tests/gp/test_hsgp.py
+++ b/tests/gp/test_hsgp.py
@@ -116,26 +116,13 @@ def gp(self, cov_func):
 
     def test_set_boundaries_1d(self, X1):
         X1s = X1 - np.mean(X1, axis=0)
-        S, L, c = pm.gp.hsgp.set_boundaries(X1s, c=2)
-        assert np.all(S == 5)
+        L = pm.gp.hsgp.set_boundary(X1s, c=2).eval()
         assert np.all(L == 10)
-        assert c == 2
 
     def test_set_boundaries_3d(self, X2):
         X2s = X2 - np.mean(X2, axis=0)
-        S, L, c = pm.gp.hsgp.set_boundaries(X2s, c=2)
-        assert np.all(S == 5)
+        L = pm.gp.hsgp.set_boundary(X2s, c=2).eval()
         assert np.all(L == 10)
-        assert c == 2
-
-    def test_set_boundaries_exceptions(self, X1):
-        X1s = X1 - np.mean(X1, axis=0)
-
-        with pytest.raises(ValueError, match="At least one of"):
-            pm.gp.hsgp.set_boundaries(X1s, tl=np)
-
-        with pytest.raises(ValueError, match="tl must be either numpy or pytensor.tensor"):
-            pm.gp.hsgp.set_boundaries(X1s, c=2, tl=sp)
 
     def test_parametrization(self):
         err_msg = "`m` and L, if provided, must be sequences with one element per active dimension"

From 1abedbb9312724f7568a60e5f0353c82e6767e44 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 6 Mar 2023 01:01:52 -0800
Subject: [PATCH 25/36] as_tensor_variable

---
 pymc/gp/hsgp.py       | 10 +++++-----
 tests/gp/test_hsgp.py |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 5e58dd4fc3..9d8a7e3859 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -27,7 +27,6 @@
 from pymc.gp.mean import Mean, Zero
 
 TensorVariable = Union[np.ndarray, pt.TensorVariable]
-TensorConstant = Union[np.ndarray, pt.TensorConstant]
 
 
 def set_boundary(Xs: TensorVariable, c: Union[float, TensorVariable]) -> TensorVariable:
@@ -39,17 +38,18 @@ def set_boundary(Xs: TensorVariable, c: Union[float, TensorVariable]) -> TensorV
     return L
 
 
-def calc_eigenvalues(L: TensorConstant, m: Sequence[int], tl: ModuleType = np):
+def calc_eigenvalues(L: TensorVariable, m: Sequence[int], tl: ModuleType = np):
     """Calculate eigenvalues of the Laplacian."""
     S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))])
     S_arr = np.vstack([s.flatten() for s in S]).T
+    print(S_arr.shape, L.shape)
     return tl.square((np.pi * S_arr) / (2 * L))
 
 
 def calc_eigenvectors(
     Xs: TensorVariable,
-    L: TensorConstant,
-    eigvals: TensorConstant,
+    L: TensorVariable,
+    eigvals: TensorVariable,
     m: Sequence[int],
     tl: ModuleType = np,
 ):
@@ -200,7 +200,7 @@ def L(self):
 
     @L.setter
     def L(self, value):
-        self._L = value
+        self._L = pt.as_tensor_variable(value)
 
     def prior_linearized(self, Xs: TensorVariable):
         """Returns the linearized version of the HSGP, the Laplace eigenfunctions and the square
diff --git a/tests/gp/test_hsgp.py b/tests/gp/test_hsgp.py
index ed2f318e5d..125b23cc96 100644
--- a/tests/gp/test_hsgp.py
+++ b/tests/gp/test_hsgp.py
@@ -184,7 +184,7 @@ def test_prior(self, model, cov_func, X1, parameterization):
         samples2 = az.extract(idata.prior["f2"])["f2"].values.T
 
         h0, mmd, critical_value, reject = two_sample_test(
-            samples1, samples2, n_sims=500, alpha=0.05
+            samples1, samples2, n_sims=500, alpha=0.01
         )
         assert not reject, "H0 was rejected, even though HSGP and GP priors should match."
 
@@ -205,6 +205,6 @@ def test_conditional(self, model, cov_func, X1, parameterization):
         samples2 = az.extract(idata.prior["fc"])["fc"].values.T
 
         h0, mmd, critical_value, reject = two_sample_test(
-            samples1, samples2, n_sims=500, alpha=0.05
+            samples1, samples2, n_sims=500, alpha=0.01
         )
         assert not reject, "H0 was rejected, even though HSGP prior and conditional should match."

From 6a1de804ef8c82b86bee97c9dc722abcbd8d50f5 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 6 Mar 2023 01:02:44 -0800
Subject: [PATCH 26/36] remove print

---
 pymc/gp/hsgp.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 9d8a7e3859..5f86deb1a4 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -42,7 +42,6 @@ def calc_eigenvalues(L: TensorVariable, m: Sequence[int], tl: ModuleType = np):
     """Calculate eigenvalues of the Laplacian."""
     S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))])
     S_arr = np.vstack([s.flatten() for s in S]).T
-    print(S_arr.shape, L.shape)
     return tl.square((np.pi * S_arr) / (2 * L))
 
 

From ed612c3451b055017bc73911967d95babf18127d Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 6 Mar 2023 15:45:04 -0800
Subject: [PATCH 27/36] add test_hsgp.py to test runner

---
 .github/workflows/tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 670f5895c2..4f80fb2f86 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -75,6 +75,7 @@ jobs:
           - |
             tests/distributions/test_timeseries.py
             tests/gp/test_cov.py
+            tests/gp/test_hsgp.py
             tests/gp/test_gp.py
             tests/gp/test_mean.py
             tests/gp/test_util.py

From ac288b963d33c93c57a77346aaa45e9ab4c615b6 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sat, 11 Mar 2023 12:28:32 -0800
Subject: [PATCH 28/36] Apply suggestions from code review

Co-authored-by: Michael Osthege <michael.osthege@outlook.com>
---
 pymc/gp/cov.py  |  4 +++-
 pymc/gp/hsgp.py | 34 +++++++++++++++++++---------------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 2867c68d6f..f0e633ddbe 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -265,7 +265,9 @@ def _merge_factors_cov(self, X, Xs=None, diag=False):
         return factor_list
 
     def _merge_factors_psd(self, omega):
-        """Called to evaluatate spectral densities of combination kernels when possible.  Implements
+        """Called to evaluatate spectral densities of combination kernels when possible.
+
+        Implements
         a more restricted set of rules than `_merge_factors_cov` -- just additivity of stationary
         covariances with defined power spectral densities and multiplication by scalars.  Also, the
         active_dims for all covariances in the sum must be the same.
diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 5f86deb1a4..df4b1acfff 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -78,7 +78,7 @@ class HSGP(Base):
     additive covariances.
 
     For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to
-    the pymc examples that use HSGP.
+    the PyMC examples that use HSGP.
 
     Parameters
     ----------
@@ -110,15 +110,17 @@ class HSGP(Base):
         X = np.random.rand(100, 3)
 
         with pm.Model() as model:
-            # Specify the covariance function.  Three input dimensions, but we only want to use the
-            # last two.
+            # Specify the covariance function.
+            # Three input dimensions, but we only want to use the last two.
             cov_func = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 2])
 
-            # Specify the HSGP.  Use 25 basis vectors across each active dimension, [1, 2]  for a
-            # total of 25 * 25 = 625.  The value `c = 4` means the boundary of the approximation
-            # lies at four times the half width of the data.  In this example the data lie between
-            # zero and  one, so the boundaries occur at -1.5 and 2.5.  The data, both for training
-            # and prediction should reside well within that boundary..
+            # Specify the HSGP.
+            # Use 25 basis vectors across each active dimension for a total of 25 * 25 = 625.
+            # The value `c = 4` means the boundary of the approximation
+            # lies at four times the half width of the data.
+            # In this example the data lie between zero and one,
+            # so the boundaries occur at -1.5 and 2.5.  The data, both for
+            # training and prediction should reside well within that boundary..
             gp = pm.gp.HSGP(m=[25, 25], c=4.0, cov_func=cov_func)
 
             # Place a GP prior over the function f.
@@ -136,10 +138,10 @@ class HSGP(Base):
     References
     ----------
     -   Ruitort-Mayol, G., and Anderson, M., and Solin, A., and Vehtari, A. (2022). Practical
-    Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming
+        Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming
 
     -   Solin, A., Sarkka, S. (2019) Hilbert Space Methods for Reduced-Rank Gaussian Process
-    Regression.
+        Regression.
     """
 
     def __init__(
@@ -202,8 +204,10 @@ def L(self, value):
         self._L = pt.as_tensor_variable(value)
 
     def prior_linearized(self, Xs: TensorVariable):
-        """Returns the linearized version of the HSGP, the Laplace eigenfunctions and the square
-        root of the power spectral density needed to create the GP.  This function allows the user
+        """Linearized version of the HSGP, the Laplace eigenfunctions and the square
+        root of the power spectral density needed to create the GP.
+
+        This function allows the user
         to bypass the GP interface and work directly with the basis and coefficients directly.
         This format allows the user to create predictions using `pm.set_data` similarly to a
         linear model.  It also enables computational speed ups in multi-GP models since they may
@@ -346,11 +350,11 @@ def conditional(self, name: str, Xnew: TensorVariable, *args, **kwargs):
 
         Parameters
         ----------
-        name: string
+        name
             Name of the random variable
-        Xnew: array-like
+        Xnew : array-like
             Function input values.
-        kwargs: dict-like
+        **kwargs : dict, optional
             Optional arguments such as `dims`.
         """
         fnew = self._build_conditional(Xnew)

From 33af5b3133101187baea8c69baa2a275b04bb5d3 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sat, 11 Mar 2023 12:29:23 -0800
Subject: [PATCH 29/36] allow c to be int or float

---
 pymc/gp/hsgp.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp.py
index 5f86deb1a4..93c6537b7e 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp.py
@@ -12,6 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+import numbers
 import warnings
 
 from types import ModuleType
@@ -146,7 +147,7 @@ def __init__(
         self,
         m: Sequence[int],
         L: Optional[Sequence[float]] = None,
-        c: Optional[float] = None,
+        c: Optional[numbers.Real] = None,
         drop_first: bool = False,
         parameterization="noncentered",
         *,
@@ -273,7 +274,7 @@ def prior_linearized(self, Xs: TensorVariable):
 
         # If not provided, use Xs and c to set L
         if self._L is None:
-            assert isinstance(self._c, (float, np.ndarray, pt.TensorVariable))
+            assert isinstance(self._c, (numbers.Real, np.ndarray, pt.TensorVariable))
             self.L = set_boundary(Xs, self._c)
         else:
             self.L = self._L

From 2abbed76e6e0e2d0b452086df41af48bf071c8aa Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sat, 11 Mar 2023 15:29:36 -0800
Subject: [PATCH 30/36] updates from review

---
 .github/workflows/tests.yml                   |  2 +-
 docs/source/api/gp/implementations.rst        |  1 +
 pymc/gp/__init__.py                           |  2 +-
 pymc/gp/cov.py                                |  2 -
 pymc/gp/{hsgp.py => hsgp_approx.py}           | 44 ++++++++++---------
 .../gp/{test_hsgp.py => test_hsgp_approx.py}  |  4 +-
 6 files changed, 28 insertions(+), 27 deletions(-)
 rename pymc/gp/{hsgp.py => hsgp_approx.py} (89%)
 rename tests/gp/{test_hsgp.py => test_hsgp_approx.py} (98%)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 4f80fb2f86..d4dab5c899 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -75,7 +75,7 @@ jobs:
           - |
             tests/distributions/test_timeseries.py
             tests/gp/test_cov.py
-            tests/gp/test_hsgp.py
+            tests/gp/test_hsgp_approx.py
             tests/gp/test_gp.py
             tests/gp/test_mean.py
             tests/gp/test_util.py
diff --git a/docs/source/api/gp/implementations.rst b/docs/source/api/gp/implementations.rst
index 3b9f972845..03b59bbf9c 100644
--- a/docs/source/api/gp/implementations.rst
+++ b/docs/source/api/gp/implementations.rst
@@ -6,6 +6,7 @@ Implementations
 .. autosummary::
    :toctree: generated
 
+   HSGP
    Latent
    LatentKron
    Marginal
diff --git a/pymc/gp/__init__.py b/pymc/gp/__init__.py
index 6706b0e9f0..99c8023398 100644
--- a/pymc/gp/__init__.py
+++ b/pymc/gp/__init__.py
@@ -22,4 +22,4 @@
     MarginalKron,
     MarginalSparse,
 )
-from pymc.gp.hsgp import HSGP
+from pymc.gp.hsgp_approx import HSGP
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index f0e633ddbe..e765588590 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -57,8 +57,6 @@ def _verify_scalar(value):
         return at.squeeze(value)
     elif np.asarray(value).squeeze().shape == ():
         return np.squeeze(value)
-    elif isinstance(value, numbers.Real):
-        return value
 
     raise ValueError("A scalar value is required.")
 
diff --git a/pymc/gp/hsgp.py b/pymc/gp/hsgp_approx.py
similarity index 89%
rename from pymc/gp/hsgp.py
rename to pymc/gp/hsgp_approx.py
index 6e230528f1..224e1e6a5c 100644
--- a/pymc/gp/hsgp.py
+++ b/pymc/gp/hsgp_approx.py
@@ -30,7 +30,7 @@
 TensorVariable = Union[np.ndarray, pt.TensorVariable]
 
 
-def set_boundary(Xs: TensorVariable, c: Union[float, TensorVariable]) -> TensorVariable:
+def set_boundary(Xs: TensorVariable, c: Union[numbers.Real, TensorVariable]) -> TensorVariable:
     """Set the boundary using the mean-subtracted `Xs` and `c`.  `c` is usually a scalar
     multiplyer greater than 1.0, but it may be one value per dimension or column of `Xs`.
     """
@@ -68,19 +68,22 @@ def calc_eigenvectors(
 
 class HSGP(Base):
     R"""
-    Hilbert Space Gaussian process
+    Hilbert Space Gaussian process approximation.
 
     The `gp.HSGP` class is an implementation of the Hilbert Space Gaussian process.  It is a
     reduced rank GP approximation that uses a fixed set of basis vectors whose coefficients are
     random functions of a stationary covariance function's power spectral density.  It's usage
     is largely similar to `gp.Latent`.  Like `gp.Latent`, it does not assume a Gaussian noise model
     and can be used with any likelihood, or as a component anywhere within a model.  Also like
-    `gp.Latent`, it has `prior` and `conditional` methods.  It supports a limited subset of
-    additive covariances.
+    `gp.Latent`, it has `prior` and `conditional` methods.  It supports any sum of covariance
+    functions that implement a `power_spectral_density` method.
 
     For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to
     the PyMC examples that use HSGP.
 
+    To with with the HSGP in its "linearized" form, as a matrix of basis vectors and and vector of
+    coefficients, see the method `prior_linearized`.
+
     Parameters
     ----------
     m: list
@@ -205,20 +208,19 @@ def L(self, value):
         self._L = pt.as_tensor_variable(value)
 
     def prior_linearized(self, Xs: TensorVariable):
-        """Linearized version of the HSGP, the Laplace eigenfunctions and the square
-        root of the power spectral density needed to create the GP.
+        """Linearized version of the HSGP.  Returns the Laplace eigenfunctions and the square root
+        of the power spectral density needed to create the GP.
 
-        This function allows the user
-        to bypass the GP interface and work directly with the basis and coefficients directly.
-        This format allows the user to create predictions using `pm.set_data` similarly to a
-        linear model.  It also enables computational speed ups in multi-GP models since they may
-        share the same basis.  The return values are the Laplace eigenfunctions `phi`, and the
-        square root of the power spectral density.
+        This function allows the user to bypass the GP interface and work directly with the basis
+        and coefficients directly.  This format allows the user to create predictions using
+        `pm.set_data` similarly to a linear model.  It also enables computational speed ups in
+        multi-GP models since they may share the same basis.  The return values are the Laplace
+        eigenfunctions `phi`, and the square root of the power spectral density.
 
         Correct results when using `prior_linearized` in tandem with `pm.set_data` and
         `pm.MutableData` require two conditions.  First, one must specify `L` instead of `c` when
         the GP is constructed.  If not, a RuntimeError is raised.  Second, the `Xs` needs to be
-        zero centered, so it's mean must be subtracted.  An example is given below.
+        zero-centered, so it's mean must be subtracted.  An example is given below.
 
         Parameters
         ----------
@@ -291,13 +293,13 @@ def prior_linearized(self, Xs: TensorVariable):
         i = int(self._drop_first == True)
         return phi[:, i:], pt.sqrt(psd[i:])
 
-    def prior(self, name: str, X: TensorVariable, *args, **kwargs):
+    def prior(self, name: str, X: TensorVariable, dims: Optional[str] = None):
         R"""
         Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
 
         Parameters
         ----------
-        name: string
+        name: str
             Name of the random variable
         X: array-like
             Function input values.
@@ -318,7 +320,7 @@ def prior(self, name: str, X: TensorVariable, *args, **kwargs):
             self._beta = pm.Normal(f"{name}_hsgp_coeffs_", sigma=sqrt_psd)
             f = self.mean_func(X) + phi @ self._beta
 
-        self.f = pm.Deterministic(name, f, dims=kwargs.get("dims"))
+        self.f = pm.Deterministic(name, f, dims=dims)
         return self.f
 
     def _build_conditional(self, Xnew):
@@ -344,10 +346,10 @@ def _build_conditional(self, Xnew):
         elif self._parameterization == "centered":
             return self.mean_func(Xnew) + phi[:, i:] @ beta
 
-    def conditional(self, name: str, Xnew: TensorVariable, *args, **kwargs):
+    def conditional(self, name: str, Xnew: TensorVariable, dims: Optional[str] = None):
         R"""
         Returns the (approximate) conditional distribution evaluated over new input locations
-        `Xnew`.  If using the
+        `Xnew`.
 
         Parameters
         ----------
@@ -355,8 +357,8 @@ def conditional(self, name: str, Xnew: TensorVariable, *args, **kwargs):
             Name of the random variable
         Xnew : array-like
             Function input values.
-        **kwargs : dict, optional
-            Optional arguments such as `dims`.
+        dims: None
+            Dimension name for the GP random variable.
         """
         fnew = self._build_conditional(Xnew)
-        return pm.Deterministic(name, fnew, dims=kwargs.get("dims"))
+        return pm.Deterministic(name, fnew, dims=dims)
diff --git a/tests/gp/test_hsgp.py b/tests/gp/test_hsgp_approx.py
similarity index 98%
rename from tests/gp/test_hsgp.py
rename to tests/gp/test_hsgp_approx.py
index 125b23cc96..b6f03a4acc 100644
--- a/tests/gp/test_hsgp.py
+++ b/tests/gp/test_hsgp_approx.py
@@ -116,12 +116,12 @@ def gp(self, cov_func):
 
     def test_set_boundaries_1d(self, X1):
         X1s = X1 - np.mean(X1, axis=0)
-        L = pm.gp.hsgp.set_boundary(X1s, c=2).eval()
+        L = pm.gp.hsgp_approx.set_boundary(X1s, c=2).eval()
         assert np.all(L == 10)
 
     def test_set_boundaries_3d(self, X2):
         X2s = X2 - np.mean(X2, axis=0)
-        L = pm.gp.hsgp.set_boundary(X2s, c=2).eval()
+        L = pm.gp.hsgp_approx.set_boundary(X2s, c=2).eval()
         assert np.all(L == 10)
 
     def test_parametrization(self):

From f9bacac731db07ed465dd39050a23651b74afd6f Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Sat, 11 Mar 2023 16:01:19 -0800
Subject: [PATCH 31/36] added mypy ignores

---
 pymc/gp/hsgp_approx.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py
index 224e1e6a5c..3175793c60 100644
--- a/pymc/gp/hsgp_approx.py
+++ b/pymc/gp/hsgp_approx.py
@@ -293,7 +293,7 @@ def prior_linearized(self, Xs: TensorVariable):
         i = int(self._drop_first == True)
         return phi[:, i:], pt.sqrt(psd[i:])
 
-    def prior(self, name: str, X: TensorVariable, dims: Optional[str] = None):
+    def prior(self, name: str, X: TensorVariable, dims: Optional[str] = None):  # type: ignore
         R"""
         Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
 
@@ -346,7 +346,7 @@ def _build_conditional(self, Xnew):
         elif self._parameterization == "centered":
             return self.mean_func(Xnew) + phi[:, i:] @ beta
 
-    def conditional(self, name: str, Xnew: TensorVariable, dims: Optional[str] = None):
+    def conditional(self, name: str, Xnew: TensorVariable, dims: Optional[str] = None):  # type: ignore
         R"""
         Returns the (approximate) conditional distribution evaluated over new input locations
         `Xnew`.

From da2f1f1da0677dc071724b8e2b4231cf8f7251bf Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 13 Mar 2023 17:37:55 -0700
Subject: [PATCH 32/36] rename TensorVariable type to TensorLike

---
 pymc/gp/hsgp_approx.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py
index 3175793c60..8536d990ef 100644
--- a/pymc/gp/hsgp_approx.py
+++ b/pymc/gp/hsgp_approx.py
@@ -27,10 +27,10 @@
 from pymc.gp.gp import Base
 from pymc.gp.mean import Mean, Zero
 
-TensorVariable = Union[np.ndarray, pt.TensorVariable]
+TensorLike = Union[np.ndarray, pt.TensorVariable]
 
 
-def set_boundary(Xs: TensorVariable, c: Union[numbers.Real, TensorVariable]) -> TensorVariable:
+def set_boundary(Xs: TensorLike, c: Union[numbers.Real, TensorLike]) -> TensorLike:
     """Set the boundary using the mean-subtracted `Xs` and `c`.  `c` is usually a scalar
     multiplyer greater than 1.0, but it may be one value per dimension or column of `Xs`.
     """
@@ -39,7 +39,7 @@ def set_boundary(Xs: TensorVariable, c: Union[numbers.Real, TensorVariable]) ->
     return L
 
 
-def calc_eigenvalues(L: TensorVariable, m: Sequence[int], tl: ModuleType = np):
+def calc_eigenvalues(L: TensorLike, m: Sequence[int], tl: ModuleType = np):
     """Calculate eigenvalues of the Laplacian."""
     S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(len(m))])
     S_arr = np.vstack([s.flatten() for s in S]).T
@@ -47,9 +47,9 @@ def calc_eigenvalues(L: TensorVariable, m: Sequence[int], tl: ModuleType = np):
 
 
 def calc_eigenvectors(
-    Xs: TensorVariable,
-    L: TensorVariable,
-    eigvals: TensorVariable,
+    Xs: TensorLike,
+    L: TensorLike,
+    eigvals: TensorLike,
     m: Sequence[int],
     tl: ModuleType = np,
 ):
@@ -207,7 +207,7 @@ def L(self):
     def L(self, value):
         self._L = pt.as_tensor_variable(value)
 
-    def prior_linearized(self, Xs: TensorVariable):
+    def prior_linearized(self, Xs: TensorLike):
         """Linearized version of the HSGP.  Returns the Laplace eigenfunctions and the square root
         of the power spectral density needed to create the GP.
 
@@ -293,7 +293,7 @@ def prior_linearized(self, Xs: TensorVariable):
         i = int(self._drop_first == True)
         return phi[:, i:], pt.sqrt(psd[i:])
 
-    def prior(self, name: str, X: TensorVariable, dims: Optional[str] = None):  # type: ignore
+    def prior(self, name: str, X: TensorLike, dims: Optional[str] = None):  # type: ignore
         R"""
         Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
 
@@ -346,7 +346,7 @@ def _build_conditional(self, Xnew):
         elif self._parameterization == "centered":
             return self.mean_func(Xnew) + phi[:, i:] @ beta
 
-    def conditional(self, name: str, Xnew: TensorVariable, dims: Optional[str] = None):  # type: ignore
+    def conditional(self, name: str, Xnew: TensorLike, dims: Optional[str] = None):  # type: ignore
         R"""
         Returns the (approximate) conditional distribution evaluated over new input locations
         `Xnew`.

From 5a51214f87759ed8cd3933f8fbea32043574d52d Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 13 Mar 2023 17:39:29 -0700
Subject: [PATCH 33/36] remove _verify_scalar_, take ricardos suggestion for
 fixing Exponential kernel creation

---
 pymc/gp/cov.py       | 5 ++++-
 tests/gp/test_cov.py | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index e765588590..f511ae05b8 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -107,7 +107,10 @@ def __rmul__(self, other):
         return self.__mul__(other)
 
     def __pow__(self, other):
-        return Exponentiated(self, _verify_scalar(other))
+        other = at.as_tensor_variable(other).squeeze()
+        if not other.ndim == 0:
+            raise ValueError("A covariance function can only be exponentiated by a scalar value")
+        return Exponentiated(self, other)
 
     def __array_wrap__(self, result):
         """
diff --git a/tests/gp/test_cov.py b/tests/gp/test_cov.py
index 822e620db4..608e484532 100644
--- a/tests/gp/test_cov.py
+++ b/tests/gp/test_cov.py
@@ -290,7 +290,9 @@ def test_covexp_shared(self):
 
     def test_invalid_covexp(self):
         X = np.linspace(0, 1, 10)[:, None]
-        with pytest.raises(ValueError, match=r"scalar value is required"):
+        with pytest.raises(
+            ValueError, match=r"A covariance function can only be exponentiated by a scalar value"
+        ):
             with pm.Model() as model:
                 a = np.array([[1.0, 2.0]])
                 cov = pm.gp.cov.ExpQuad(1, 0.1) ** a

From 04d8c0dd282c1e3378c34a3b94406b0b7e055758 Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 13 Mar 2023 17:57:08 -0700
Subject: [PATCH 34/36] add returns docstring for prior_linearized

---
 pymc/gp/hsgp_approx.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py
index 8536d990ef..adca16f600 100644
--- a/pymc/gp/hsgp_approx.py
+++ b/pymc/gp/hsgp_approx.py
@@ -227,6 +227,15 @@ def prior_linearized(self, Xs: TensorLike):
         Xs: array-like
             Function input values.  Assumes they have been mean subtracted or centered at zero.
 
+        Returns
+        -------
+        phi: array-like
+            Either Numpy or PyTensor 2D array of the fixed basis vectors.  There are n rows, one
+            per row of `Xs` and `prod(m)` columns, one for each basis vector.
+        sqrt_psd: array-like
+            Either a Numpy or PyTensor 1D array of the square roots of the power spectral
+            densities.
+
         Examples
         --------
         .. code:: python
@@ -296,6 +305,7 @@ def prior_linearized(self, Xs: TensorLike):
     def prior(self, name: str, X: TensorLike, dims: Optional[str] = None):  # type: ignore
         R"""
         Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
+        For usage examples, refer to `pm.gp.Latent`.
 
         Parameters
         ----------

From 626598c22455bb6d73202b98f14c27183a3ac35b Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 13 Mar 2023 18:54:46 -0700
Subject: [PATCH 35/36] fix precommit

---
 pymc/gp/cov.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index c9c5fdc002..e69367af42 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -21,7 +21,6 @@
 from typing import Optional, Sequence
 
 import numpy as np
-import pytensor
 import pytensor.tensor as pt
 
 from pytensor.graph.basic import Variable

From ba7859f2c9dd2ccffbef4ed9c05c3b8f444876ce Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Mon, 13 Mar 2023 20:16:25 -0700
Subject: [PATCH 36/36] at -> pt

---
 pymc/gp/cov.py | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index e69367af42..134c3f207e 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -175,9 +175,9 @@ def _slice(self, X, Xs=None):
                 " the number of columns to use. Ignore otherwise.",
                 UserWarning,
             )
-        X = at.as_tensor_variable(X[:, self.active_dims])
+        X = pt.as_tensor_variable(X[:, self.active_dims])
         if Xs is not None:
-            Xs = at.as_tensor_variable(Xs[:, self.active_dims])
+            Xs = pt.as_tensor_variable(Xs[:, self.active_dims])
         return X, Xs
 
 
@@ -573,10 +573,10 @@ def power_spectral_density(self, omega):
                (\sqrt(2 \pi)^D \prod_{i}^{D}\ell_i
                 \exp\left( -\frac{1}{2} \sum_{i}^{D}\ell_i^2 \omega_i^{2} \right)
         """
-        ls = at.ones(self.n_dims) * self.ls
-        c = at.power(at.sqrt(2.0 * np.pi), self.n_dims)
-        exp = at.exp(-0.5 * at.dot(at.square(omega), at.square(ls)))
-        return c * at.prod(ls) * exp
+        ls = pt.ones(self.n_dims) * self.ls
+        c = pt.power(pt.sqrt(2.0 * np.pi), self.n_dims)
+        exp = pt.exp(-0.5 * pt.dot(pt.square(omega), pt.square(ls)))
+        return c * pt.prod(ls) * exp
 
 
 class RatQuad(Stationary):
@@ -628,17 +628,17 @@ def power_spectral_density(self, omega):
                \prod_{i=1}^{D}\ell_{i}
                \left(5 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+5}{2}}
         """
-        ls = at.ones(self.n_dims) * self.ls
+        ls = pt.ones(self.n_dims) * self.ls
         D52 = (self.n_dims + 5) / 2
         num = (
-            at.power(2, self.n_dims)
-            * at.power(np.pi, self.n_dims / 2)
-            * at.gamma(D52)
-            * at.power(5, 5 / 2)
+            pt.power(2, self.n_dims)
+            * pt.power(np.pi, self.n_dims / 2)
+            * pt.gamma(D52)
+            * pt.power(5, 5 / 2)
         )
-        den = 0.75 * at.sqrt(np.pi)
-        pow = at.power(5.0 + at.dot(at.square(omega), at.square(ls)), -1 * D52)
-        return (num / den) * at.prod(ls) * pow
+        den = 0.75 * pt.sqrt(np.pi)
+        pow = pt.power(5.0 + pt.dot(pt.square(omega), pt.square(ls)), -1 * D52)
+        return (num / den) * pt.prod(ls) * pow
 
 
 class Matern32(Stationary):
@@ -668,17 +668,17 @@ def power_spectral_density(self, omega):
                \prod_{i=1}^{D}\ell_{i}
                \left(3 + \sum_{i=1}^{D}\ell_{i}^2 \boldsymbol\omega_{i}^{2}\right)^{-\frac{D+3}{2}}
         """
-        ls = at.ones(self.n_dims) * self.ls
+        ls = pt.ones(self.n_dims) * self.ls
         D32 = (self.n_dims + 3) / 2
         num = (
-            at.power(2, self.n_dims)
-            * at.power(np.pi, self.n_dims / 2)
-            * at.gamma(D32)
-            * at.power(3, 3 / 2)
+            pt.power(2, self.n_dims)
+            * pt.power(np.pi, self.n_dims / 2)
+            * pt.gamma(D32)
+            * pt.power(3, 3 / 2)
         )
-        den = 0.5 * at.sqrt(np.pi)
-        pow = at.power(3.0 + at.dot(at.square(omega), at.square(ls)), -1 * D32)
-        return (num / den) * at.prod(ls) * pow
+        den = 0.5 * pt.sqrt(np.pi)
+        pow = pt.power(3.0 + pt.dot(pt.square(omega), pt.square(ls)), -1 * D32)
+        return (num / den) * pt.prod(ls) * pow
 
 
 class Matern12(Stationary):