add types utils and bart

juanitorduz · juanitorduz · commit 846b0b8fdc08 · 2023-03-28T21:35:05.000+02:00
diff --git a/pymc_bart/bart.py b/pymc_bart/bart.py
@@ -17,25 +17,30 @@
 from multiprocessing import Manager
 
 import numpy as np
+import numpy.typing as npt
 import pytensor.tensor as pt
 from pandas import DataFrame, Series
 from pymc.distributions.distribution import Distribution, _moment
 from pymc.logprob.abstract import _logprob
 from pytensor.tensor.random.op import RandomVariable
+from typing import List, Optional, Tuple
+
+
+from .utils import _sample_posterior, TensorLike
+
 
-from .utils import _sample_posterior
 
 __all__ = ["BART"]
 
 
 class BARTRV(RandomVariable):
     """Base class for BART."""
 
-    name = "BART"
+    name: str = "BART"
     ndim_supp = 1
-    ndims_params = [2, 1, 0, 0, 1]
-    dtype = "floatX"
-    _print_name = ("BART", "\\operatorname{BART}")
+    ndims_params: List[int] = [2, 1, 0, 0, 1]
+    dtype: str = "floatX"
+    _print_name: Tuple[str, str] = ("BART", "\\operatorname{BART}")
     all_trees = None
 
     def _supp_shape_from_params(self, dist_params, rep_param_idx=1, param_shapes=None):
@@ -63,29 +68,29 @@ class BART(Distribution):
 
     Parameters
     ----------
-    X : array-like
+    X : TensorLike
         The covariate matrix.
-    Y : array-like
+    Y : TensorLike
         The response vector.
     m : int
         Number of trees
     alpha : float
         Control the prior probability over the depth of the trees. Even when it can takes values in
         the interval (0, 1), it is recommended to be in the interval (0, 0.5].
-    split_prior : array-like
+    split_prior : Optional[List[float]], default None.
         Each element of split_prior should be in the [0, 1] interval and the elements should sum to
         1. Otherwise they will be normalized.
         Defaults to 0, i.e. all covariates have the same prior probability to be selected.
     """
 
     def __new__(
         cls,
-        name,
-        X,
-        Y,
-        m=50,
-        alpha=0.25,
-        split_prior=None,
+        name: str,
+        X: TensorLike,
+        Y: TensorLike,
+        m: int = 50,
+        alpha: float = 0.25,
+        split_prior: Optional[List[float]] = None,
         **kwargs,
     ):
         manager = Manager()
@@ -146,7 +151,9 @@ def get_moment(cls, rv, size, *rv_inputs):
         return mean
 
 
-def preprocess_xy(X, Y):
+def preprocess_xy(
+    X: TensorLike, Y: TensorLike
+) -> Tuple[npt.NDArray[np.float_], npt.NDArray[np.float_]]:
     if isinstance(Y, (Series, DataFrame)):
         Y = Y.to_numpy()
     if isinstance(X, (Series, DataFrame)):
diff --git a/pymc_bart/utils.py b/pymc_bart/utils.py
@@ -2,16 +2,29 @@
 
 import warnings
 
+import pytensor.tensor as pt
 import arviz as az
 import matplotlib.pyplot as plt
 import numpy as np
+import numpy.typing as npt
 from pytensor.tensor.var import Variable
 from scipy.interpolate import griddata
 from scipy.signal import savgol_filter
 from scipy.stats import norm, pearsonr
+from typing import List, Optional, Tuple, Union
+from .tree import Tree
 
 
-def _sample_posterior(all_trees, X, rng, size=None, excluded=None):
+TensorLike = Union[npt.NDArray[np.float_], pt.TensorVariable]
+
+
+def _sample_posterior(
+    all_trees: List[List[Tree]],
+    X: TensorLike,
+    rng: np.random.Generator,
+    size=Optional[Union[int, Tuple[int, ...]]],
+    excluded: Optional[List[int]] = None,
+) -> npt.NDArray[np.float_]:
     """
     Generate samples from the BART-posterior.