Skip to content

Finish HSGP #61

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions pymc_experimental/gp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
from pymc_experimental.gp.latent_approx import (
HSGP,
KarhunenLoeveExpansion,
ProjectedProcess,
)
from pymc_experimental.gp.hsgp import HSGP
from pymc_experimental.gp.latent_approx import KarhunenLoeveExpansion, ProjectedProcess
195 changes: 195 additions & 0 deletions pymc_experimental/gp/hsgp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import warnings

import aesara.tensor as at
import numpy as np
import pymc as pm


class HSGP(pm.gp.gp.Base):
R"""
Hilbert Space Gaussian process

The `gp.HSGP` class is an implementation of the Hilbert Space Gaussian process. This
approximation is a linear model that uses a fixed set of basis vectors, whose coeficients are
random functions of a stationary covariance function's power spectral density. Like
`gp.Latent`, it does not assume a Gaussian noise model and can be used with any likelihood or as
a component anywhere within a model. Also like `gp.Latent`, it has `prior` and `conditional`
methods. It additonally has an `approx_K` method which returns the approximate covariance
matrix. It supports a limited subset of additive covariances.

For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to the
pymc examples documentation.

Parameters
----------
m: list
The number of basis vectors to use for each active dimension (covariance parameter
`active_dim`).
L: list
The boundary of the space for each `active_dim`. It is called the boundary condition.
Choose L such that the domain `[-L, L]` contains all points in the column of X given by the
`active_dim`.
c: 1.5
The proportion extension factor. Used to construct L from X. Defined as `S = max|X|` such
that `X` is in `[-S, S]`. `L` is the calculated as `c * S`. One of `c` or `L` must be
provided. Further information can be found in Ruitort-Mayol et. al.
cov_func: None, 2D array, or instance of Covariance
The covariance function. Defaults to zero.
mean_func: None, instance of Mean
The mean function. Defaults to zero.

Examples
--------
.. code:: python

# A three dimensional column vector of inputs.
X = np.random.randn(100, 3)

with pm.Model() as model:
# Specify the covariance function. Three input dimensions, but we only want to use the
# last two.
cov_func = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 2])

# Specify the HSGP. Use 10 basis vectors across each active dimension, [1, 2] for a
# total of 10 * 10 = 100. The input X is normally distributed, so use a boundary
# condition that should easily contain all the points, from -6 to 6 in each dimension.
gp = pmx.gp.HSGP(n_basis=[10, 10], L=[6, 6], cov_func=cov_func)

# Place a GP prior over the function f.
f = gp.prior("f", X=X)

...

# After fitting or sampling, specify the distribution
# at new points with .conditional
Xnew = np.linspace(-1, 2, 50)[:, None]

with model:
fcond = gp.conditional("fcond", Xnew=Xnew)

References
----------
- Ruitort-Mayol, G., and Anderson, M., and Solin, A., and Vehtari, A. (2022). Practical
Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming

- Solin, A., Sarkka, S. (2019) Hilbert Space Methods for Reduced-Rank Gaussian Process
Regression.
"""

def __init__(
self,
m,
L=None,
c=1.5,
*,
mean_func=pm.gp.mean.Zero(),
cov_func=pm.gp.cov.Constant(0.0),
):
arg_err_msg = (
"`m` and L, if provided, must be lists or tuples, with one element per active "
"dimension."
)
try:
if len(m) != cov_func.D:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do covariance functions have a D attribute?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this supposed to be len(active_dims)?

raise ValueError(arg_err_msg)
except TypeError as e:
raise ValueError(arg_err_msg) from e

if L is not None and len(L) != cov_func.D:
raise ValueError(arg_err_msg)

if L is None and c < 1.2:
warnings.warn(
"Most applications will require a `c >= 1.2` for accuracy at the boundaries of the "
"domain."
)

self.m = m
self.L = L
self.c = c
self.D = cov_func.D

super().__init__(mean_func=mean_func, cov_func=cov_func)

def __add__(self, other):
raise NotImplementedError("Additive HSGPs aren't supported ")

def _set_boundary(self, X):
"""Make L from X and c if L is not passed in."""
if self.L is None:
# Define new L based on c and X range
La = at.abs(at.min(X, axis=0))
Lb = at.abs(at.max(X, axis=0))
self.L = self.c * at.max(at.stack((La, Lb)), axis=0)
else:
self.L = at.as_tensor_variable(self.L)

@staticmethod
def _eigendecomposition(X, L, m, D):
"""Construct the eigenvalues and eigenfunctions of the Laplace operator."""
m_star = at.prod(m)
S = np.meshgrid(*[np.arange(1, 1 + m[d]) for d in range(D)])
S = np.vstack([s.flatten() for s in S]).T
eigvals = at.square((np.pi * S) / (2 * L))
phi = at.ones((X.shape[0], m_star))
for d in range(D):
c = 1.0 / np.sqrt(L[d])
phi *= c * at.sin(at.sqrt(eigvals[:, d]) * (at.tile(X[:, d][:, None], m_star) + L[d]))
omega = at.sqrt(eigvals)
return omega, phi, m_star

def approx_K(self, X, L, m):
"""A helper function which gives the approximate kernel or covariance matrix K. This can be
helpful when trying to see how well an approximation may work.
"""
X, _ = self.cov_func._slice(X)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_slice takes two positional arguments.

omega, phi, _ = self._eigendecomposition(X, self.L, self.m, self.cov_func.D)
psd = self.cov_func.psd(omega)
return at.dot(phi * psd, at.transpose(phi))

def prior(self, name, X, dims=None):
R"""
Returns the (approximate) GP prior distribution evaluated over the input locations `X`.

Parameters
----------
name: string
Name of the random variable
X: array-like
Function input values.
dims: None
Dimension name for the GP random variable.
"""

X, _ = self.cov_func._slice(X)
self._set_boundary(X)
omega, phi, m_star = self._eigendecomposition(X, self.L, self.m, self.D)
psd = self.cov_func.psd(omega)
self.beta = pm.Normal(f"{name}_coeffs_", size=m_star)
self.f = pm.Deterministic(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to pass dims for the variable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes! thanks for catching this

name, self.mean_func(X) + at.squeeze(at.dot(phi, self.beta * psd)), dims
)
return self.f

def _build_conditional(self, name, Xnew):
Xnew, _ = self.cov_func._slice(Xnew)
omega, phi, _ = self._eigendecomposition(Xnew, self.L, self.m, self.D)
psd = self.cov_func.psd(omega)
return self.mean_func(Xnew) + at.squeeze(at.dot(phi, self.beta * psd))

def conditional(self, name, Xnew, dims=None):
R"""
Returns the (approximate) conditional distribution evaluated over new input locations
`Xnew`.

Parameters
----------
name: string
Name of the random variable
Xnew: array-like
Function input values.
dims: None
Dimension name for the GP random variable.
"""
fnew = self._build_conditional(name, Xnew)
return pm.Deterministic(name, fnew, dims)
102 changes: 0 additions & 102 deletions pymc_experimental/gp/latent_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,108 +60,6 @@ def conditional(self, name, Xnew, jitter=1e-6, **kwargs):
return pm.MvNormal(name, mu=mu, chol=chol)


class HSGP(pm.gp.Latent):
## inputs: M, c

def __init__(
self, n_basis, c=3 / 2, *, mean_func=pm.gp.mean.Zero(), cov_func=pm.gp.cov.Constant(0.0)
):
## TODO: specify either c or L
self.M = n_basis
self.c = c
super().__init__(mean_func=mean_func, cov_func=cov_func)

def _validate_cov_func(self, cov_func):
## TODO: actually validate it. Right now this fails unless cov func is exactly
# in the form eta**2 * pm.gp.cov.Matern12(...) and will error otherwise.
cov, scaling_factor = cov_func.factor_list
return scaling_factor, cov.ls, cov.spectral_density

def prior(self, name, X, **kwargs):
f, Phi, L, spd, beta, Xmu, Xsd = self._build_prior(name, X, **kwargs)
self.X, self.f = X, f
self.Phi, self.L, self.spd, self.beta = Phi, L, spd, beta
self.Xmu, self.Xsd = Xmu, Xsd
return f

def _generate_basis(self, X, L):
indices = at.arange(1, self.M + 1)
m1 = (np.pi / (2.0 * L)) * at.tile(L + X, self.M)
m2 = at.diag(indices)
Phi = at.sin(m1 @ m2) / at.sqrt(L)
omega = (np.pi * indices) / (2.0 * L)
return Phi, omega

def _build_prior(self, name, X, **kwargs):
n_obs = np.shape(X)[0]

# standardize input scale
X = at.as_tensor_variable(X)
Xmu = at.mean(X, axis=0)
Xsd = at.std(X, axis=0)
Xz = (X - Xmu) / Xsd

# define L using Xz and c
La = at.abs(at.min(Xz)) # .eval()?
Lb = at.max(Xz)
L = self.c * at.max([La, Lb])

# make basis and omega, spectral density
Phi, omega = self._generate_basis(Xz, L)
scale, ls, spectral_density = self._validate_cov_func(self.cov_func)
spd = scale * spectral_density(omega, ls / Xsd).flatten()

beta = pm.Normal(f"{name}_coeffs_", size=self.M)
f = pm.Deterministic(name, self.mean_func(X) + at.dot(Phi * at.sqrt(spd), beta))
return f, Phi, L, spd, beta, Xmu, Xsd

def _build_conditional(self, Xnew, Xmu, Xsd, L, beta):
Xnewz = (Xnew - Xmu) / Xsd
Phi, omega = self._generate_basis(Xnewz, L)
scale, ls, spectral_density = self._validate_cov_func(self.cov_func)
spd = scale * spectral_density(omega, ls / Xsd).flatten()
return self.mean_func(Xnew) + at.dot(Phi * at.sqrt(spd), beta)

def conditional(self, name, Xnew):
# warn about extrapolation
fnew = self._build_conditional(Xnew, self.Xmu, self.Xsd, self.L, self.beta)
return pm.Deterministic(name, fnew)


class ExpQuad(pm.gp.cov.ExpQuad):
@staticmethod
def spectral_density(omega, ls):
# univariate spectral denisty, implement multi
return at.sqrt(2 * np.pi) * ls * at.exp(-0.5 * ls**2 * omega**2)


class Matern52(pm.gp.cov.Matern52):
@staticmethod
def spectral_density(omega, ls):
# univariate spectral denisty, implement multi
# https://arxiv.org/pdf/1611.06740.pdf
lam = at.sqrt(5) * (1.0 / ls)
return (16.0 / 3.0) * lam**5 * (1.0 / (lam**2 + omega**2) ** 3)


class Matern32(pm.gp.cov.Matern32):
@staticmethod
def spectral_density(omega, ls):
# univariate spectral denisty, implement multi
# https://arxiv.org/pdf/1611.06740.pdf
lam = np.sqrt(3.0) * (1.0 / ls)
return 4.0 * lam**3 * (1.0 / at.square(lam**2 + omega**2))


class Matern12(pm.gp.cov.Matern12):
@staticmethod
def spectral_density(omega, ls):
# univariate spectral denisty, implement multi
# https://arxiv.org/pdf/1611.06740.pdf
lam = 1.0 / ls
return 2.0 * lam * (1.0 / (lam**2 + omega**2))


class KarhunenLoeveExpansion(pm.gp.Latent):
def __init__(
self,
Expand Down