pymc-devs · bwengals · Aug 1, 2022 · Aug 2, 2022 · Aug 5, 2022 · Aug 5, 2022
diff --git a/pymc_experimental/gp/__init__.py b/pymc_experimental/gp/__init__.py
@@ -1,5 +1,2 @@
-from pymc_experimental.gp.latent_approx import (
-    HSGP,
-    KarhunenLoeveExpansion,
-    ProjectedProcess,
-)
+from pymc_experimental.gp.hsgp import HSGP
+from pymc_experimental.gp.latent_approx import KarhunenLoeveExpansion, ProjectedProcess
diff --git a/pymc_experimental/gp/hsgp.py b/pymc_experimental/gp/hsgp.py
@@ -0,0 +1,195 @@
+import warnings
+
+import aesara.tensor as at
+import numpy as np
+import pymc as pm
+
+
+class HSGP(pm.gp.gp.Base):
+    R"""
+    Hilbert Space Gaussian process
+
+    The `gp.HSGP` class is an implementation of the Hilbert Space Gaussian process.  This
+    approximation is a linear model that uses a fixed set of basis vectors, whose coeficients are
+    random functions of a stationary covariance function's power spectral density.  Like
+    `gp.Latent`, it does not assume a Gaussian noise model and can be used with any likelihood or as
+    a component anywhere within a model.  Also like `gp.Latent`, it has `prior` and `conditional`
+    methods.  It additonally has an `approx_K` method which returns the approximate covariance
+    matrix.  It supports a limited subset of additive covariances.
+
+    For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to the
+    pymc examples documentation.
+
+    Parameters
+    ----------
+    m: list
+        The number of basis vectors to use for each active dimension (covariance parameter
+        `active_dim`).
+    L: list
+        The boundary of the space for each `active_dim`.  It is called the boundary condition.
+        Choose L such that the domain `[-L, L]` contains all points in the column of X given by the
+        `active_dim`.
+    c: 1.5
+        The proportion extension factor.  Used to construct L from X.  Defined as `S = max|X|` such
+        that `X` is in `[-S, S]`.  `L` is the calculated as `c * S`.  One of `c` or `L` must be
+        provided.  Further information can be found in Ruitort-Mayol et. al.
+    cov_func: None, 2D array, or instance of Covariance
+        The covariance function.  Defaults to zero.
+    mean_func: None, instance of Mean
+        The mean function.  Defaults to zero.
+
+    Examples
+    --------
+    .. code:: python
+
+        # A three dimensional column vector of inputs.
+        X = np.random.randn(100, 3)
+
+        with pm.Model() as model:
+            # Specify the covariance function.  Three input dimensions, but we only want to use the
+            # last two.
+            cov_func = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 2])
+
+            # Specify the HSGP.  Use 10 basis vectors across each active dimension, [1, 2]  for a
+            # total of 10 * 10 = 100.  The input X is normally distributed, so use a boundary
+            # condition that should easily contain all the points, from -6 to 6 in each dimension.
+            gp = pmx.gp.HSGP(n_basis=[10, 10], L=[6, 6], cov_func=cov_func)
+
+            # Place a GP prior over the function f.
+            f = gp.prior("f", X=X)
+
+        ...
+
+        # After fitting or sampling, specify the distribution
+        # at new points with .conditional
+        Xnew = np.linspace(-1, 2, 50)[:, None]
+
+        with model:
+            fcond = gp.conditional("fcond", Xnew=Xnew)
+
+    References
+    ----------
+    -   Ruitort-Mayol, G., and Anderson, M., and Solin, A., and Vehtari, A. (2022). Practical
+    Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming
+
+    -   Solin, A., Sarkka, S. (2019) Hilbert Space Methods for Reduced-Rank Gaussian Process
+    Regression.
+    """
+
+    def __init__(
+        self,
+        m,
+        L=None,
+        c=1.5,
+        *,
+        mean_func=pm.gp.mean.Zero(),
+        cov_func=pm.gp.cov.Constant(0.0),
+    ):
+        arg_err_msg = (
+            "`m` and L, if provided, must be lists or tuples, with one element per active "
+            "dimension."
+        )
+        try:
+            if len(m) != cov_func.D:
+                raise ValueError(arg_err_msg)
+        except TypeError as e:
+            raise ValueError(arg_err_msg) from e
+
+        if L is not None and len(L) != cov_func.D:
+            raise ValueError(arg_err_msg)
+
+        if L is None and c < 1.2:
+            warnings.warn(
+                "Most applications will require a `c >= 1.2` for accuracy at the boundaries of the "
+                "domain."
+            )
+
+        self.m = m
+        self.L = L
+        self.c = c
+        self.D = cov_func.D
+
+        super().__init__(mean_func=mean_func, cov_func=cov_func)
+
+    def __add__(self, other):
+        raise NotImplementedError("Additive HSGPs aren't supported ")
+
+    def _set_boundary(self, X):
+        """Make L from X and c if L is not passed in."""
+        if self.L is None:
+            # Define new L based on c and X range
+            La = at.abs(at.min(X, axis=0))
+            Lb = at.abs(at.max(X, axis=0))
+            self.L = self.c * at.max(at.stack((La, Lb)), axis=0)
+        else:
+            self.L = at.as_tensor_variable(self.L)
+
+    @staticmethod
+    def _eigendecomposition(X, L, m, D):
+        """Construct the eigenvalues and eigenfunctions of the Laplace operator."""
+        m_star = at.prod(m)
+        S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(D)])
+        S = np.vstack([s.flatten() for s in S]).T
+        eigvals = at.square((np.pi * S) / (2 * L))
+        phi = at.ones((X.shape[0], m_star))
+        for d in range(D):
+            c = 1.0 / np.sqrt(L[d])
+            phi *= c * at.sin(at.sqrt(eigvals[:, d]) * (at.tile(X[:, d][:, None], m_star) + L[d]))
+        omega = at.sqrt(eigvals)
+        return omega, phi, m_star
+
+    def approx_K(self, X, L, m):
+        """A helper function which gives the approximate kernel or covariance matrix K. This can be
+        helpful when trying to see how well an approximation may work.
+        """
+        X, _ = self.cov_func._slice(X)
+        omega, phi, _ = self._eigendecomposition(X, self.L, self.m, self.cov_func.D)
+        psd = self.cov_func.psd(omega)
+        return at.dot(phi * psd, at.transpose(phi))
+
+    def prior(self, name, X, dims=None):
+        R"""
+        Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
+
+        Parameters
+        ----------
+        name: string
+            Name of the random variable
+        X: array-like
+            Function input values.
+        dims: None
+            Dimension name for the GP random variable.
+        """
+
+        X, _ = self.cov_func._slice(X)
+        self._set_boundary(X)
+        omega, phi, m_star = self._eigendecomposition(X, self.L, self.m, self.D)
+        psd = self.cov_func.psd(omega)
+        self.beta = pm.Normal(f"{name}_coeffs_", size=m_star)
+        self.f = pm.Deterministic(
+            name, self.mean_func(X) + at.squeeze(at.dot(phi, self.beta * psd)), dims
+        )
+        return self.f
+
+    def _build_conditional(self, name, Xnew):
+        Xnew, _ = self.cov_func._slice(Xnew)
+        omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.D)
+        psd = self.cov_func.psd(omega)
+        return self.mean_func(Xnew) + at.squeeze(at.dot(phi, self.beta * psd))
+
+    def conditional(self, name, Xnew, dims=None):
+        R"""
+        Returns the (approximate) conditional distribution evaluated over new input locations
+        `Xnew`.
+
+        Parameters
+        ----------
+        name: string
+            Name of the random variable
+        Xnew: array-like
+            Function input values.
+        dims: None
+            Dimension name for the GP random variable.
+        """
+        fnew = self._build_conditional(name, Xnew)
+        return pm.Deterministic(name, fnew, dims)
diff --git a/pymc_experimental/gp/latent_approx.py b/pymc_experimental/gp/latent_approx.py
@@ -60,108 +60,6 @@ def conditional(self, name, Xnew, jitter=1e-6, **kwargs):
         return pm.MvNormal(name, mu=mu, chol=chol)
 
 
-class HSGP(pm.gp.Latent):
-    ## inputs: M, c
-
-    def __init__(
-        self, n_basis, c=3 / 2, *, mean_func=pm.gp.mean.Zero(), cov_func=pm.gp.cov.Constant(0.0)
-    ):
-        ## TODO: specify either c or L
-        self.M = n_basis
-        self.c = c
-        super().__init__(mean_func=mean_func, cov_func=cov_func)
-
-    def _validate_cov_func(self, cov_func):
-        ## TODO: actually validate it.  Right now this fails unless cov func is exactly
-        # in the form eta**2 * pm.gp.cov.Matern12(...) and will error otherwise.
-        cov, scaling_factor = cov_func.factor_list
-        return scaling_factor, cov.ls, cov.spectral_density
-
-    def prior(self, name, X, **kwargs):
-        f, Phi, L, spd, beta, Xmu, Xsd = self._build_prior(name, X, **kwargs)
-        self.X, self.f = X, f
-        self.Phi, self.L, self.spd, self.beta = Phi, L, spd, beta
-        self.Xmu, self.Xsd = Xmu, Xsd
-        return f
-
-    def _generate_basis(self, X, L):
-        indices = at.arange(1, self.M + 1)
-        m1 = (np.pi / (2.0 * L)) * at.tile(L + X, self.M)
-        m2 = at.diag(indices)
-        Phi = at.sin(m1 @ m2) / at.sqrt(L)
-        omega = (np.pi * indices) / (2.0 * L)
-        return Phi, omega
-
-    def _build_prior(self, name, X, **kwargs):
-        n_obs = np.shape(X)[0]
-
-        # standardize input scale
-        X = at.as_tensor_variable(X)
-        Xmu = at.mean(X, axis=0)
-        Xsd = at.std(X, axis=0)
-        Xz = (X - Xmu) / Xsd
-
-        # define L using Xz and c
-        La = at.abs(at.min(Xz))  # .eval()?
-        Lb = at.max(Xz)
-        L = self.c * at.max([La, Lb])
-
-        # make basis and omega, spectral density
-        Phi, omega = self._generate_basis(Xz, L)
-        scale, ls, spectral_density = self._validate_cov_func(self.cov_func)
-        spd = scale * spectral_density(omega, ls / Xsd).flatten()
-
-        beta = pm.Normal(f"{name}_coeffs_", size=self.M)
-        f = pm.Deterministic(name, self.mean_func(X) + at.dot(Phi * at.sqrt(spd), beta))
-        return f, Phi, L, spd, beta, Xmu, Xsd
-
-    def _build_conditional(self, Xnew, Xmu, Xsd, L, beta):
-        Xnewz = (Xnew - Xmu) / Xsd
-        Phi, omega = self._generate_basis(Xnewz, L)
-        scale, ls, spectral_density = self._validate_cov_func(self.cov_func)
-        spd = scale * spectral_density(omega, ls / Xsd).flatten()
-        return self.mean_func(Xnew) + at.dot(Phi * at.sqrt(spd), beta)
-
-    def conditional(self, name, Xnew):
-        # warn about extrapolation
-        fnew = self._build_conditional(Xnew, self.Xmu, self.Xsd, self.L, self.beta)
-        return pm.Deterministic(name, fnew)
-
-
-class ExpQuad(pm.gp.cov.ExpQuad):
-    @staticmethod
-    def spectral_density(omega, ls):
-        # univariate spectral denisty, implement multi
-        return at.sqrt(2 * np.pi) * ls * at.exp(-0.5 * ls**2 * omega**2)
-
-
-class Matern52(pm.gp.cov.Matern52):
-    @staticmethod
-    def spectral_density(omega, ls):
-        # univariate spectral denisty, implement multi
-        # https://arxiv.org/pdf/1611.06740.pdf
-        lam = at.sqrt(5) * (1.0 / ls)
-        return (16.0 / 3.0) * lam**5 * (1.0 / (lam**2 + omega**2) ** 3)
-
-
-class Matern32(pm.gp.cov.Matern32):
-    @staticmethod
-    def spectral_density(omega, ls):
-        # univariate spectral denisty, implement multi
-        # https://arxiv.org/pdf/1611.06740.pdf
-        lam = np.sqrt(3.0) * (1.0 / ls)
-        return 4.0 * lam**3 * (1.0 / at.square(lam**2 + omega**2))
-
-
-class Matern12(pm.gp.cov.Matern12):
-    @staticmethod
-    def spectral_density(omega, ls):
-        # univariate spectral denisty, implement multi
-        # https://arxiv.org/pdf/1611.06740.pdf
-        lam = 1.0 / ls
-        return 2.0 * lam * (1.0 / (lam**2 + omega**2))
-
-
 class KarhunenLoeveExpansion(pm.gp.Latent):
     def __init__(
         self,