diff --git a/docs/_templates/autosummary/base.rst b/docs/_templates/autosummary/base.rst new file mode 100644 index 000000000..3fe9858d7 --- /dev/null +++ b/docs/_templates/autosummary/base.rst @@ -0,0 +1,5 @@ +{{ objname | escape | underline }} + +.. currentmodule:: {{ module }} + +.. auto{{ objtype }}:: {{ objname }} diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst new file mode 100644 index 000000000..574b853a5 --- /dev/null +++ b/docs/_templates/autosummary/class.rst @@ -0,0 +1,29 @@ +{{ objname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + + {% block methods %} + .. automethod:: __init__ + + {% if methods %} + .. rubric:: {{ _('Methods') }} + + .. autosummary:: + {% for item in methods %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block attributes %} + {% if attributes %} + .. rubric:: {{ _('Attributes') }} + + .. autosummary:: + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/docs/api_reference.rst b/docs/api_reference.rst index b47e912fa..2d85c3ab4 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -4,23 +4,50 @@ API Reference This reference provides detailed documentation for all modules, classes, and methods in the current release of PyMC experimental. +.. currentmodule:: pymc_experimental +.. autosummary:: + :toctree: generated/ -.. toctree:: - :maxdepth: 2 + marginal_model.MarginalModel + model_builder.ModelBuilder +Inference +========= -:mod:`pymc_experimental.distributions` -============================= +.. currentmodule:: pymc_experimental.inference +.. autosummary:: + :toctree: generated/ -.. automodule:: pymc_experimental.distributions.histogram_utils - :members: histogram_approximation + fit -:mod:`pymc_experimental.utils` -============================= +Distributions +============= -.. automodule:: pymc_experimental.utils.spline - :members: bspline_interpolation +.. currentmodule:: pymc_experimental.distributions +.. autosummary:: + :toctree: generated/ -.. automodule:: pymc_experimental.utils.prior - :members: prior_from_idata + GenExtreme + histogram_utils.histogram_approximation + + +Gaussian Processess +=================== + +.. currentmodule:: pymc_experimental.gp +.. autosummary:: + :toctree: generated/ + + latent_approx.HSGP + + +Utils +===== + +.. currentmodule:: pymc_experimental.utils +.. autosummary:: + :toctree: generated/ + + spline.bspline_interpolation + prior.prior_from_idata diff --git a/docs/conf.py b/docs/conf.py index f45b3407c..e831458f2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,16 +56,18 @@ # ones. extensions = [ "sphinx.ext.autodoc", + "sphinx.ext.autosummary", "sphinx.ext.viewcode", "sphinx.ext.napoleon", "sphinx.ext.mathjax", "nbsphinx", + "matplotlib.sphinxext.plot_directive", ] nbsphinx_execute = "never" # Add any paths that contain templates here, relative to this directory. -# templates_path = ["_templates"] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: @@ -81,7 +83,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +# language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -97,7 +99,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = "pydata_sphinx_theme" +html_theme = "pymc_sphinx_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -110,14 +112,17 @@ "show_toc_level": 2, "navigation_depth": 4, "search_bar_text": "Search the docs...", - "icon_links": [ - { - "name": "GitHub", - "url": "https://github.com/pymc-devs/pymc-experimental", - "icon": "fab fa-github-square", - }, - ], + "use_search_override": False, + "logo": {"text": project}, } +html_context = { + "github_user": "pymc-devs", + "github_repo": "pymc-experimental", + "github_version": "main", + "doc_path": "docs", + "default_mode": "light", +} + # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/docs/index.rst b/docs/index.rst index b5dfef6fd..28cf4c9a3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -38,16 +38,7 @@ Contributors ============ See the `GitHub contributor page `_. -Contents -======== - .. toctree:: - :maxdepth: 4 + :hidden: api_reference - -Indices -======= - -* :ref:`genindex` -* :ref:`modindex` diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index e214f0081..2de72c769 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -60,7 +60,7 @@ def rng_fn( class GenExtreme(Continuous): r""" - Univariate Generalized Extreme Value log-likelihood + Univariate Generalized Extreme Value log-likelihood The cdf of this distribution is @@ -119,13 +119,13 @@ class GenExtreme(Continuous): Parameters ---------- - mu: float + mu : float Location parameter. - sigma: float + sigma : float Scale parameter (sigma > 0). - xi: float + xi : float Shape parameter - scipy: bool + scipy : bool Whether or not to use the Scipy interpretation of the shape parameter (defaults to `False`). diff --git a/pymc_experimental/marginal_model.py b/pymc_experimental/marginal_model.py index aed93adb5..599509bfb 100644 --- a/pymc_experimental/marginal_model.py +++ b/pymc_experimental/marginal_model.py @@ -54,6 +54,7 @@ class MarginalModel(Model): Marginalize over a single variable .. code-block:: python + import pymc as pm from pymc_experimental import MarginalModel diff --git a/pymc_experimental/model_builder.py b/pymc_experimental/model_builder.py index 6d2513349..191b1b1e7 100644 --- a/pymc_experimental/model_builder.py +++ b/pymc_experimental/model_builder.py @@ -15,6 +15,7 @@ import hashlib import json +from abc import abstractmethod from pathlib import Path from typing import Dict, Union @@ -24,12 +25,10 @@ import pymc as pm -class ModelBuilder(pm.Model): +class ModelBuilder: """ ModelBuilder can be used to provide an easy-to-use API (similar to scikit-learn) for models and help with deployment. - - Extends the pymc.Model class. """ _model_type = "BaseClass" @@ -38,8 +37,8 @@ class ModelBuilder(pm.Model): def __init__( self, model_config: Dict, - sampler_config: Dict, - data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None, + data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]], + sampler_config: Dict = None, ): """ Initializes model configuration and sampler configuration for the model @@ -48,10 +47,10 @@ def __init__( ---------- model_config : Dictionary dictionary of parameters that initialise model configuration. Generated by the user defined create_sample_input method. - sampler_config : Dictionary - dictionary of parameters that initialise sampler configuration. Generated by the user defined create_sample_input method. data : Dictionary It is the data we need to train the model on. + sampler_config : Dictionary + dictionary of parameters that initialise sampler configuration. Generated by the user defined create_sample_input method. Examples -------- >>> class LinearModel(ModelBuilder): @@ -60,20 +59,23 @@ def __init__( """ super().__init__() + if sampler_config is None: + sampler_config = {} self.model_config = model_config # parameters for priors etc. - self.sample_config = sampler_config # parameters for sampling - self.idata = None # inference data object + self.sampler_config = sampler_config # parameters for sampling self.data = data - self.build() + self.idata = ( + None # inference data object placeholder, idata is generated during build execution + ) - def build(self): + def build(self) -> None: """ Builds the defined model. """ - with self: - self.build_model(self.model_config, self.data) + self.build_model(self, self.model_config, self.data) + @abstractmethod def _data_setter( self, data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]], x_only: bool = True ): @@ -100,8 +102,10 @@ def _data_setter( raise NotImplementedError - @classmethod - def create_sample_input(cls): + # need a discussion if it's really needed. + @staticmethod + @abstractmethod + def create_sample_input(): """ Needs to be implemented by the user in the inherited class. Returns examples for data, model_config, sampler_config. @@ -135,7 +139,7 @@ def create_sample_input(cls): raise NotImplementedError - def save(self, fname): + def save(self, fname: str) -> None: """ Saves inference data of the model. @@ -159,8 +163,9 @@ def save(self, fname): self.idata.to_netcdf(file) @classmethod - def load(cls, fname): + def load(cls, fname: str): """ + Creates a ModelBuilder instance from a file, Loads inference data for the model. Parameters @@ -170,7 +175,7 @@ def load(cls, fname): Returns ------- - Returns the inference data that is loaded from local system. + Returns an instance of ModelBuilder. Raises ------ @@ -187,22 +192,29 @@ def load(cls, fname): filepath = Path(str(fname)) idata = az.from_netcdf(filepath) - self = cls( - json.loads(idata.attrs["model_config"]), - json.loads(idata.attrs["sampler_config"]), - idata.fit_data.to_dataframe(), + if "sampler_config" in idata.attrs: + sampler_config = json.loads(idata.attrs["sampler_config"]) + else: + sampler_config = {} + model_builder = cls( + model_config=json.loads(idata.attrs["model_config"]), + sampler_config=sampler_config, + data=idata.fit_data.to_dataframe(), ) - self.idata = idata - if self.id != idata.attrs["id"]: + model_builder.idata = idata + model_builder.build() + if model_builder.id != idata.attrs["id"]: raise ValueError( - f"The file '{fname}' does not contain an inference data of the same model or configuration as '{self._model_type}'" + f"The file '{fname}' does not contain an inference data of the same model or configuration as '{cls._model_type}'" ) - return self + return model_builder - def fit(self, data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None): + def fit( + self, data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None + ) -> az.InferenceData: """ - As the name suggests fit can be used to fit a model using the data that is passed as a parameter. + Fit a model using the data passed as a parameter. Sets attrs to inference data of the model. Parameter @@ -225,20 +237,22 @@ def fit(self, data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None if data is not None: self.data = data - self._data_setter(data) - - if self.basic_RVs == []: self.build() + self._data_setter(data) - with self: - self.idata = pm.sample(**self.sample_config) + with self.model: + if self.sampler_config: + self.idata = pm.sample(**self.sampler_config) + else: + self.idata = pm.sample() self.idata.extend(pm.sample_prior_predictive()) self.idata.extend(pm.sample_posterior_predictive(self.idata)) self.idata.attrs["id"] = self.id self.idata.attrs["model_type"] = self._model_type self.idata.attrs["version"] = self.version - self.idata.attrs["sampler_config"] = json.dumps(self.sample_config) + if self.sampler_config: + self.idata.attrs["sampler_config"] = json.dumps(self.sampler_config) self.idata.attrs["model_config"] = json.dumps(self.model_config) self.idata.add_groups(fit_data=self.data.to_xarray()) return self.idata @@ -246,7 +260,8 @@ def fit(self, data: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None def predict( self, data_prediction: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None, - ): + extend_idata: bool = True, + ) -> dict: """ Uses model to predict on unseen data and return point prediction of all the samples @@ -254,6 +269,8 @@ def predict( --------- data_prediction : Dictionary of string and either of numpy array, pandas dataframe or pandas Series It is the data we need to make prediction on using the model. + extend_idata : Boolean determining whether the predictions should be added to inference data object. + Defaults to True. Returns ------- @@ -275,7 +292,8 @@ def predict( with self.model: # sample with new input data post_pred = pm.sample_posterior_predictive(self.idata) - + if extend_idata: + self.idata.extend(post_pred) # reshape output post_pred = self._extract_samples(post_pred) for key in post_pred: @@ -286,7 +304,8 @@ def predict( def predict_posterior( self, data_prediction: Dict[str, Union[np.ndarray, pd.DataFrame, pd.Series]] = None, - ): + extend_idata: bool = True, + ) -> Dict[str, np.array]: """ Uses model to predict samples on unseen data. @@ -294,8 +313,8 @@ def predict_posterior( --------- data_prediction : Dictionary of string and either of numpy array, pandas dataframe or pandas Series It is the data we need to make prediction on using the model. - point_estimate : bool - Adds point like estimate used as mean passed as + extend_idata : Boolean determining whether the predictions should be added to inference data object. + Defaults to True. Returns ------- @@ -317,6 +336,8 @@ def predict_posterior( with self.model: # sample with new input data post_pred = pm.sample_posterior_predictive(self.idata) + if extend_idata: + self.idata.extend(post_pred) # reshape output post_pred = self._extract_samples(post_pred) @@ -357,5 +378,4 @@ def id(self) -> str: hasher.update(str(self.model_config.values()).encode()) hasher.update(self.version.encode()) hasher.update(self._model_type.encode()) - # hasher.update(str(self.sample_config.values()).encode()) return hasher.hexdigest()[:16] diff --git a/pymc_experimental/tests/test_model_builder.py b/pymc_experimental/tests/test_model_builder.py index 1dd67e621..21c6890d2 100644 --- a/pymc_experimental/tests/test_model_builder.py +++ b/pymc_experimental/tests/test_model_builder.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - import hashlib import sys import tempfile @@ -29,26 +28,39 @@ class test_ModelBuilder(ModelBuilder): _model_type = "LinearModel" version = "0.1" - def build_model(self, model_config, data=None): - if data is not None: - x = pm.MutableData("x", data["input"].values) - y_data = pm.MutableData("y_data", data["output"].values) - - # prior parameters - a_loc = model_config["a_loc"] - a_scale = model_config["a_scale"] - b_loc = model_config["b_loc"] - b_scale = model_config["b_scale"] - obs_error = model_config["obs_error"] - - # priors - a = pm.Normal("a", a_loc, sigma=a_scale) - b = pm.Normal("b", b_loc, sigma=b_scale) - obs_error = pm.HalfNormal("σ_model_fmc", obs_error) - - # observed data - if data is not None: - y_model = pm.Normal("y_model", a + b * x, obs_error, shape=x.shape, observed=y_data) + def build_model( + self, + model_instance: ModelBuilder, + model_config: dict, + data: dict = None, + sampler_config: dict = None, + ): + model_instance.model_config = model_config + model_instance.data = data + self.model_config = model_config + self.sampler_config = sampler_config + self.data = data + + with pm.Model() as model_instance.model: + if data is not None: + x = pm.MutableData("x", data["input"].values) + y_data = pm.MutableData("y_data", data["output"].values) + + # prior parameters + a_loc = model_config["a_loc"] + a_scale = model_config["a_scale"] + b_loc = model_config["b_loc"] + b_scale = model_config["b_scale"] + obs_error = model_config["obs_error"] + + # priors + a = pm.Normal("a", a_loc, sigma=a_scale) + b = pm.Normal("b", b_loc, sigma=b_scale) + obs_error = pm.HalfNormal("σ_model_fmc", obs_error) + + # observed data + if data is not None: + y_model = pm.Normal("y_model", a + b * x, obs_error, shape=x.shape, observed=y_data) def _data_setter(self, data: pd.DataFrame): with self.model: @@ -57,7 +69,7 @@ def _data_setter(self, data: pd.DataFrame): pm.set_data({"y_data": data["output"].values}) @classmethod - def create_sample_input(cls): + def create_sample_input(self): x = np.linspace(start=0, stop=1, num=100) y = 5 * x + 3 y = y + np.random.normal(0, 1, len(x)) @@ -81,14 +93,14 @@ def create_sample_input(cls): return data, model_config, sampler_config @staticmethod - def initial_build_and_fit(check_idata=True): + def initial_build_and_fit(check_idata=True) -> ModelBuilder: data, model_config, sampler_config = test_ModelBuilder.create_sample_input() - model = test_ModelBuilder(model_config, sampler_config, data) - model.fit() + model_builder = test_ModelBuilder(model_config, sampler_config, data) + model_builder.idata = model_builder.fit(data=data) if check_idata: - assert model.idata is not None - assert "posterior" in model.idata.groups() - return model + assert model_builder.idata is not None + assert "posterior" in model_builder.idata.groups() + return model_builder def test_fit(): @@ -105,16 +117,16 @@ def test_fit(): sys.platform == "win32", reason="Permissions for temp files not granted on windows CI." ) def test_save_load(): - model = test_ModelBuilder.initial_build_and_fit(False) + test_builder = test_ModelBuilder.initial_build_and_fit() temp = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False) - model.save(temp.name) - model2 = test_ModelBuilder.load(temp.name) - assert model.idata.groups() == model2.idata.groups() + test_builder.save(temp.name) + test_builder2 = test_ModelBuilder.load(temp.name) + assert test_builder.idata.groups() == test_builder2.idata.groups() x_pred = np.random.uniform(low=0, high=1, size=100) prediction_data = pd.DataFrame({"input": x_pred}) - pred1 = model.predict(prediction_data) - pred2 = model2.predict(prediction_data) + pred1 = test_builder.predict(prediction_data) + pred2 = test_builder2.predict(prediction_data) assert pred1["y_model"].shape == pred2["y_model"].shape temp.close() diff --git a/requirements-docs.txt b/requirements-docs.txt index 0b024fea9..185288a8b 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -1,3 +1,4 @@ nbsphinx>=0.4.2 pydata-sphinx-theme>=0.6.3 sphinx>=4 +git+https://github.com/pymc-devs/pymc-sphinx-theme