diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 5133bbd285b50..f85dc83ab8605 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -165,4 +165,26 @@ def peakmem_fixed(self): self.roll.max() +class ForwardWindowMethods: + params = ( + ["DataFrame", "Series"], + [10, 1000], + ["int", "float"], + ["median", "mean", "max", "min", "kurt", "sum"], + ) + param_names = ["constructor", "window_size", "dtype", "method"] + + def setup(self, constructor, window_size, dtype, method): + N = 10 ** 5 + arr = np.random.random(N).astype(dtype) + indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=window_size) + self.roll = getattr(pd, constructor)(arr).rolling(window=indexer) + + def time_rolling(self, constructor, window_size, dtype, method): + getattr(self.roll, method)() + + def peakmem_rolling(self, constructor, window_size, dtype, method): + getattr(self.roll, method)() + + from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst index 570a0607ebd21..fb60a0d387ca2 100644 --- a/doc/source/reference/window.rst +++ b/doc/source/reference/window.rst @@ -85,3 +85,4 @@ Base class for defining custom window boundaries. :toctree: api/ api.indexers.BaseIndexer + api.indexers.FixedForwardWindowIndexer diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index 0b7106aa127e5..af2f02a09428b 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -571,6 +571,20 @@ and we want to use an expanding window where ``use_expanding`` is ``True`` other 3 3.0 4 10.0 +.. versionadded:: 1.1 + +For some problems knowledge of the future is available for analysis. For example, this occurs when +each data point is a full time series read from an experiment, and the task is to extract underlying +conditions. In these cases it can be useful to perform forward-looking rolling window computations. +:func:`FixedForwardWindowIndexer ` class is available for this purpose. +This :func:`BaseIndexer ` subclass implements a closed fixed-width +forward-looking rolling window, and we can use it as follows: + +.. ipython:: ipython + + from pandas.api.indexers import FixedForwardWindowIndexer + indexer = FixedForwardWindowIndexer(window_size=2) + df.rolling(indexer, min_periods=1).sum() .. _stats.rolling_window.endpoints: diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 6f2b9b4f946c7..310dd0be4cde3 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -104,6 +104,7 @@ Other API changes - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) - Using a :func:`pandas.api.indexers.BaseIndexer` with ``std``, ``var``, ``count``, ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) - Using a :func:`pandas.api.indexers.BaseIndexer` with ``min``, ``max`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) +- Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations. - Backwards incompatible API changes diff --git a/pandas/api/indexers/__init__.py b/pandas/api/indexers/__init__.py index 826297e6b498f..0b36b53675e23 100644 --- a/pandas/api/indexers/__init__.py +++ b/pandas/api/indexers/__init__.py @@ -3,6 +3,6 @@ """ from pandas.core.indexers import check_array_indexer -from pandas.core.window.indexers import BaseIndexer +from pandas.core.window.indexers import BaseIndexer, FixedForwardWindowIndexer -__all__ = ["check_array_indexer", "BaseIndexer"] +__all__ = ["check_array_indexer", "BaseIndexer", "FixedForwardWindowIndexer"] diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index 921cdb3c2523f..9a02c5231c151 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -120,3 +120,53 @@ def get_window_bounds( np.zeros(num_values, dtype=np.int64), np.arange(1, num_values + 1, dtype=np.int64), ) + + +class FixedForwardWindowIndexer(BaseIndexer): + """ + Creates window boundaries for fixed-length windows that include the + current row. + + Examples + -------- + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2) + >>> df.rolling(window=indexer, min_periods=1).sum() + B + 0 1.0 + 1 3.0 + 2 2.0 + 3 4.0 + 4 4.0 + """ + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: Optional[int] = None, + center: Optional[bool] = None, + closed: Optional[str] = None, + ) -> Tuple[np.ndarray, np.ndarray]: + + if center: + raise ValueError("Forward-looking windows can't have center=True") + if closed is not None: + raise ValueError( + "Forward-looking windows don't support setting the closed argument" + ) + + start = np.arange(num_values, dtype="int64") + end_s = start[: -self.window_size] + self.window_size + end_e = np.full(self.window_size, num_values, dtype="int64") + end = np.concatenate([end_s, end_e]) + + return start, end diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 729e4069b1309..3fdf81c4bb570 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -900,6 +900,17 @@ class Window(_Window): 3 2.0 4 4.0 + Same as above, but with forward-looking windows + + >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2) + >>> df.rolling(window=indexer, min_periods=1).sum() + B + 0 1.0 + 1 3.0 + 2 2.0 + 3 4.0 + 4 4.0 + A ragged (meaning not-a-regular frequency), time-indexed DataFrame >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 25d575e0ad0b6..bb93c70b8a597 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -3,7 +3,7 @@ from pandas import DataFrame, Series import pandas._testing as tm -from pandas.api.indexers import BaseIndexer +from pandas.api.indexers import BaseIndexer, FixedForwardWindowIndexer from pandas.core.window.indexers import ExpandingIndexer @@ -105,19 +105,21 @@ def get_window_bounds(self, num_values, min_periods, center, closed): ) def test_rolling_forward_window(constructor, func, alt_func, expected): # GH 32865 - class ForwardIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): - start = np.arange(num_values, dtype="int64") - end_s = start[: -self.window_size] + self.window_size - end_e = np.full(self.window_size, num_values, dtype="int64") - end = np.concatenate([end_s, end_e]) - - return start, end - values = np.arange(10) values[5] = 100.0 - indexer = ForwardIndexer(window_size=3) + indexer = FixedForwardWindowIndexer(window_size=3) + + match = "Forward-looking windows can't have center=True" + with pytest.raises(ValueError, match=match): + rolling = constructor(values).rolling(window=indexer, center=True) + result = getattr(rolling, func)() + + match = "Forward-looking windows don't support setting the closed argument" + with pytest.raises(ValueError, match=match): + rolling = constructor(values).rolling(window=indexer, closed="right") + result = getattr(rolling, func)() + rolling = constructor(values).rolling(window=indexer, min_periods=2) result = getattr(rolling, func)() expected = constructor(expected)