Skip to content

Add Hilbert Space Student T Process #6997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion pymc/gp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,19 @@
MarginalKron,
MarginalSparse,
)
from pymc.gp.hsgp_approx import HSGP
from pymc.gp.hsgp_approx import HSGP, HSTP

__all__ = [
"cov",
"mean",
"util",
"TP",
"Latent",
"LatentKron",
"Marginal",
"MarginalApprox",
"MarginalKron",
"MarginalSparse",
"HSGP",
"HSTP",
]
298 changes: 214 additions & 84 deletions pymc/gp/hsgp_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import numbers
import warnings

from abc import ABC, abstractmethod
from types import ModuleType
from typing import Optional, Sequence, Union

Expand All @@ -29,6 +30,8 @@

TensorLike = Union[np.ndarray, pt.TensorVariable]

__all__ = ["HSGP", "HSTP"]


def set_boundary(Xs: TensorLike, c: Union[numbers.Real, TensorLike]) -> TensorLike:
"""Set the boundary using the mean-subtracted `Xs` and `c`. `c` is usually a scalar
Expand Down Expand Up @@ -66,87 +69,8 @@ def calc_eigenvectors(
return phi


class HSGP(Base):
R"""
Hilbert Space Gaussian process approximation.

The `gp.HSGP` class is an implementation of the Hilbert Space Gaussian process. It is a
reduced rank GP approximation that uses a fixed set of basis vectors whose coefficients are
random functions of a stationary covariance function's power spectral density. It's usage
is largely similar to `gp.Latent`. Like `gp.Latent`, it does not assume a Gaussian noise model
and can be used with any likelihood, or as a component anywhere within a model. Also like
`gp.Latent`, it has `prior` and `conditional` methods. It supports any sum of covariance
functions that implement a `power_spectral_density` method.

For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to
the PyMC examples that use HSGP.

To with with the HSGP in its "linearized" form, as a matrix of basis vectors and and vector of
coefficients, see the method `prior_linearized`.

Parameters
----------
m: list
The number of basis vectors to use for each active dimension (covariance parameter
`active_dim`).
L: list
The boundary of the space for each `active_dim`. It is called the boundary condition.
Choose L such that the domain `[-L, L]` contains all points in the column of X given by the
`active_dim`.
c: float
The proportion extension factor. Used to construct L from X. Defined as `S = max|X|` such
that `X` is in `[-S, S]`. `L` is the calculated as `c * S`. One of `c` or `L` must be
provided. Further information can be found in Ruitort-Mayol et. al.
drop_first: bool
Default `False`. Sometimes the first basis vector is quite "flat" and very similar to
the intercept term. When there is an intercept in the model, ignoring the first basis
vector may improve sampling.
cov_func: None, 2D array, or instance of Covariance
The covariance function. Defaults to zero.
mean_func: None, instance of Mean
The mean function. Defaults to zero.

Examples
--------
.. code:: python

# A three dimensional column vector of inputs.
X = np.random.rand(100, 3)

with pm.Model() as model:
# Specify the covariance function.
# Three input dimensions, but we only want to use the last two.
cov_func = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 2])

# Specify the HSGP.
# Use 25 basis vectors across each active dimension for a total of 25 * 25 = 625.
# The value `c = 4` means the boundary of the approximation
# lies at four times the half width of the data.
# In this example the data lie between zero and one,
# so the boundaries occur at -1.5 and 2.5. The data, both for
# training and prediction should reside well within that boundary..
gp = pm.gp.HSGP(m=[25, 25], c=4.0, cov_func=cov_func)

# Place a GP prior over the function f.
f = gp.prior("f", X=X)

...

# After fitting or sampling, specify the distribution
# at new points with .conditional
Xnew = np.linspace(-1, 2, 50)[:, None]

with model:
fcond = gp.conditional("fcond", Xnew=Xnew)

References
----------
- Ruitort-Mayol, G., and Anderson, M., and Solin, A., and Vehtari, A. (2022). Practical
Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming

- Solin, A., Sarkka, S. (2019) Hilbert Space Methods for Reduced-Rank Gaussian Process
Regression.
"""
class HSSP(ABC, Base):
"""Hilbert Space Base."""

def __init__(
self,
Expand All @@ -158,7 +82,7 @@ def __init__(
*,
mean_func: Mean = Zero(),
cov_func: Covariance,
):
) -> None:
arg_err_msg = (
"`m` and L, if provided, must be sequences with one element per active "
"dimension of the kernel or covariance function."
Expand Down Expand Up @@ -302,6 +226,10 @@ def prior_linearized(self, Xs: TensorLike):
i = int(self._drop_first == True)
return phi[:, i:], pt.sqrt(psd[i:])

@abstractmethod
def _base_dist(self, name: str, **kwargs) -> pt.TensorVariable:
raise NotImplementedError

def prior(self, name: str, X: TensorLike, dims: Optional[str] = None): # type: ignore
R"""
Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
Expand All @@ -320,14 +248,14 @@ def prior(self, name: str, X: TensorLike, dims: Optional[str] = None): # type:
phi, sqrt_psd = self.prior_linearized(X - self._X_mean)

if self._parameterization == "noncentered":
self._beta = pm.Normal(
self._beta = self._base_dist(
f"{name}_hsgp_coeffs_", size=self._m_star - int(self._drop_first)
)
self._sqrt_psd = sqrt_psd
f = self.mean_func(X) + phi @ (self._beta * self._sqrt_psd)

elif self._parameterization == "centered":
self._beta = pm.Normal(f"{name}_hsgp_coeffs_", sigma=sqrt_psd)
self._beta = self._base_dist(f"{name}_hsgp_coeffs_", sigma=sqrt_psd)
f = self.mean_func(X) + phi @ self._beta

self.f = pm.Deterministic(name, f, dims=dims)
Expand Down Expand Up @@ -372,3 +300,205 @@ def conditional(self, name: str, Xnew: TensorLike, dims: Optional[str] = None):
"""
fnew = self._build_conditional(Xnew)
return pm.Deterministic(name, fnew, dims=dims)


class HSGP(HSSP):
R"""
Hilbert Space Gaussian process approximation.

The `gp.HSGP` class is an implementation of the Hilbert Space Gaussian process. It is a
reduced rank GP approximation that uses a fixed set of basis vectors whose coefficients are
random functions of a stationary covariance function's power spectral density. It's usage
is largely similar to `gp.Latent`. Like `gp.Latent`, it does not assume a Gaussian noise model
and can be used with any likelihood, or as a component anywhere within a model. Also like
`gp.Latent`, it has `prior` and `conditional` methods. It supports any sum of covariance
functions that implement a `power_spectral_density` method.

For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to
the PyMC examples that use HSGP.

To with with the HSGP in its "linearized" form, as a matrix of basis vectors and and vector of
coefficients, see the method `prior_linearized`.

Parameters
----------
m: list
The number of basis vectors to use for each active dimension (covariance parameter
`active_dim`).
L: list
The boundary of the space for each `active_dim`. It is called the boundary condition.
Choose L such that the domain `[-L, L]` contains all points in the column of X given by the
`active_dim`.
c: float
The proportion extension factor. Used to construct L from X. Defined as `S = max|X|` such
that `X` is in `[-S, S]`. `L` is the calculated as `c * S`. One of `c` or `L` must be
provided. Further information can be found in Ruitort-Mayol et. al.
drop_first: bool
Default `False`. Sometimes the first basis vector is quite "flat" and very similar to
the intercept term. When there is an intercept in the model, ignoring the first basis
vector may improve sampling.
cov_func: None, 2D array, or instance of Covariance
The covariance function.
mean_func: None, instance of Mean
The mean function. Defaults to zero.

Examples
--------
.. code:: python

# A three dimensional column vector of inputs.
X = np.random.rand(100, 3)

with pm.Model() as model:
# Specify the covariance function.
# Three input dimensions, but we only want to use the last two.
cov_func = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 2])

# Specify the HSGP.
# Use 25 basis vectors across each active dimension for a total of 25 * 25 = 625.
# The value `c = 4` means the boundary of the approximation
# lies at four times the half width of the data.
# In this example the data lie between zero and one,
# so the boundaries occur at -1.5 and 2.5. The data, both for
# training and prediction should reside well within that boundary..
gp = pm.gp.HSGP(m=[25, 25], c=4.0, cov_func=cov_func)

# Place a GP prior over the function f.
f = gp.prior("f", X=X)

...

# After fitting or sampling, specify the distribution
# at new points with .conditional
Xnew = np.linspace(-1, 2, 50)[:, None]

with model:
fcond = gp.conditional("fcond", Xnew=Xnew)

References
----------
- Ruitort-Mayol, G., and Anderson, M., and Solin, A., and Vehtari, A. (2022). Practical
Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming

- Solin, A., Sarkka, S. (2019) Hilbert Space Methods for Reduced-Rank Gaussian Process
Regression.
"""

def _base_dist(self, name: str, **kwargs) -> pt.TensorVariable:
return pm.Normal(name, **kwargs)


class HSTP(HSSP):
R"""
Hilbert Space Student-T process approximation.

The `gp.HSTP` class is an implementation of the Hilbert Space Student T process. It is a
reduced rank TP approximation that uses a fixed set of basis vectors whose coefficients are
random functions of a stationary covariance function's power spectral density. It's usage
is largely similar to `gp.Latent`. Like `gp.Latent`, it does not assume a Gaussian noise model
and can be used with any likelihood, or as a component anywhere within a model. Also like
`gp.Latent`, it has `prior` and `conditional` methods. It supports any sum of covariance
functions that implement a `power_spectral_density` method.

For information on choosing appropriate `m`, `L`, and `c`, refer Ruitort-Mayol et. al. or to
the PyMC examples that use HSGP.

To with with the HSGP in its "linearized" form, as a matrix of basis vectors and and vector of
coefficients, see the method `prior_linearized`.

Parameters
----------
m: list
The number of basis vectors to use for each active dimension (covariance parameter
`active_dim`).
L: list
The boundary of the space for each `active_dim`. It is called the boundary condition.
Choose L such that the domain `[-L, L]` contains all points in the column of X given by the
`active_dim`.
c: float
The proportion extension factor. Used to construct L from X. Defined as `S = max|X|` such
that `X` is in `[-S, S]`. `L` is the calculated as `c * S`. One of `c` or `L` must be
provided. Further information can be found in Ruitort-Mayol et. al.
drop_first: bool
Default `False`. Sometimes the first basis vector is quite "flat" and very similar to
the intercept term. When there is an intercept in the model, ignoring the first basis
vector may improve sampling.
scale_func: None, 2D array, or instance of Covariance
The covariance function.
mean_func: None, instance of Mean
The mean function. Defaults to zero.
nu: TensorLike
Student T degrees of freedom

Examples
--------
.. code:: python

# A three dimensional column vector of inputs.
X = np.random.rand(100, 3)

with pm.Model() as model:
# Specify the covariance (scale) function.
# Three input dimensions, but we only want to use the last two.
scale_func = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 2])

# Specify the HSTP.
# Use 25 basis vectors across each active dimension for a total of 25 * 25 = 625.
# The value `c = 4` means the boundary of the approximation
# lies at four times the half width of the data.
# In this example the data lie between zero and one,
# so the boundaries occur at -1.5 and 2.5. The data, both for
# training and prediction should reside well within that boundary.
#
# Do not forget `nu` this time
tp = pm.gp.HSTP(m=[25, 25], c=4.0, scale_func=cov_func, nu=5.)

# Place a GP prior over the function f.
f = tp.prior("f", X=X)

...

# After fitting or sampling, specify the distribution
# at new points with .conditional
Xnew = np.linspace(-1, 2, 50)[:, None]

with model:
fcond = tp.conditional("fcond", Xnew=Xnew)

References
----------
- Ruitort-Mayol, G., and Anderson, M., and Solin, A., and Vehtari, A. (2022). Practical
Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming

- Solin, A., Sarkka, S. (2019) Hilbert Space Methods for Reduced-Rank Gaussian Process
Regression.

- Jeremy Sellier, Petros Dellaportas, (2023). Bayesian online change point detection
with Hilbert space approximate Student-t process
"""

def __init__(
self,
m: Sequence[int],
L: Optional[Sequence[float]] = None,
c: Optional[numbers.Real] = None,
drop_first: bool = False,
parameterization="noncentered",
*,
mean_func: Mean = Zero(),
scale_func: Covariance,
nu: pt.TensorLike,
) -> None:
super().__init__(
m,
L,
c,
drop_first,
parameterization,
mean_func=mean_func,
cov_func=scale_func,
)
self.nu = pt.as_tensor_variable(nu)
Copy link
Member

@ricardoV94 ricardoV94 Nov 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Referencing variables in the scope of the class is what's messing up with #6883

Is there another way?

PS: I know this is just the tip of the iceberg but would be great to figure it out sooner rather than later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there another way?

Yeah, probably for the GP classes it's possible, but I think for the covariance functions is where it will be a lot harder.


def _base_dist(self, name: str, **kwargs) -> pt.TensorVariable:
return pm.StudentT(name, nu=self.nu, **kwargs)
Loading