diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 6ebf610587d30..8e935b7c05300 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -8,22 +8,6 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries from pandas.core.indexes.api import MultiIndex -from pandas.core.shared_docs import _shared_docs - -_shared_docs = dict(**_shared_docs) -_doc_template = """ - Returns - ------- - Series or DataFrame - Return type is determined by the caller. - - See Also - -------- - pandas.Series.%(name)s : Calling object with Series data. - pandas.DataFrame.%(name)s : Calling object with DataFrame data. - pandas.Series.%(func_name)s : Similar method for Series. - pandas.DataFrame.%(func_name)s : Similar method for DataFrame. -""" def flex_binary_moment(arg1, arg2, f, pairwise=False): diff --git a/pandas/core/window/doc.py b/pandas/core/window/doc.py new file mode 100644 index 0000000000000..7e3f7895c0125 --- /dev/null +++ b/pandas/core/window/doc.py @@ -0,0 +1,119 @@ +"""Any shareable docstring components for rolling/expanding/ewm""" +from textwrap import dedent + +from pandas.core.shared_docs import _shared_docs + +_shared_docs = dict(**_shared_docs) + + +def create_section_header(header: str) -> str: + """Create numpydoc section header""" + return "\n".join((header, "-" * len(header))) + "\n" + + +template_header = "Calculate the {window_method} {aggregation_description}.\n\n" + +template_returns = dedent( + """ + Series or DataFrame + Return type is the same as the original object.\n + """ +).replace("\n", "", 1) + +template_see_also = dedent( + """ + pandas.Series.{window_method} : Calling {window_method} with Series data. + pandas.DataFrame.{window_method} : Calling {window_method} with DataFrames. + pandas.Series.{agg_method} : Aggregating {agg_method} for Series. + pandas.DataFrame.{agg_method} : Aggregating {agg_method} for DataFrame.\n + """ +).replace("\n", "", 1) + +args_compat = dedent( + """ + *args + For NumPy compatibility and will not have an effect on the result.\n + """ +).replace("\n", "", 1) + +kwargs_compat = dedent( + """ + **kwargs + For NumPy compatibility and will not have an effect on the result.\n + """ +).replace("\n", "", 1) + +kwargs_scipy = dedent( + """ + **kwargs + Keyword arguments to configure the ``SciPy`` weighted window type.\n + """ +).replace("\n", "", 1) + +window_apply_parameters = dedent( + """ + func : function + Must produce a single value from an ndarray input if ``raw=True`` + or a single value from a Series if ``raw=False``. Can also accept a + Numba JIT function with ``engine='numba'`` specified. + + .. versionchanged:: 1.0.0 + + raw : bool, default None + * ``False`` : passes each row or column as a Series to the + function. + * ``True`` : the passed function will receive ndarray + objects instead. + If you are just applying a NumPy reduction function this will + achieve much better performance. + + engine : str, default None + * ``'cython'`` : Runs rolling apply through C-extensions from cython. + * ``'numba'`` : Runs rolling apply through JIT compiled code from numba. + Only available when ``raw`` is set to ``True``. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.0.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be + applied to both the ``func`` and the ``apply`` rolling aggregation. + + .. versionadded:: 1.0.0 + + args : tuple, default None + Positional arguments to be passed into func. + + kwargs : dict, default None + Keyword arguments to be passed into func.\n + """ +).replace("\n", "", 1) + +numba_notes = ( + "See :ref:`window.numba_engine` for extended documentation " + "and performance considerations for the Numba engine.\n" +) + +window_agg_numba_parameters = dedent( + """ + engine : str, default None + * ``'cython'`` : Runs the operation through C-extensions from cython. + * ``'numba'`` : Runs the operation through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.3.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` + + .. versionadded:: 1.3.0\n + """ +).replace("\n", "", 1) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 6c16ff3edc1d2..2948216e200de 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -12,18 +12,22 @@ import pandas._libs.window.aggregations as window_aggregations from pandas._typing import FrameOrSeries, TimedeltaConvertibleTypes from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution, doc +from pandas.util._decorators import doc from pandas.core.dtypes.common import is_datetime64_ns_dtype from pandas.core.dtypes.missing import isna import pandas.core.common as common from pandas.core.util.numba_ import maybe_use_numba -from pandas.core.window.common import ( - _doc_template, +from pandas.core.window.common import flex_binary_moment, zsqrt +from pandas.core.window.doc import ( _shared_docs, - flex_binary_moment, - zsqrt, + args_compat, + create_section_header, + kwargs_compat, + template_header, + template_returns, + template_see_also, ) from pandas.core.window.indexers import ( BaseIndexer, @@ -37,16 +41,6 @@ from pandas import Series -_bias_template = """ - Parameters - ---------- - bias : bool, default False - Use a standard estimation bias correction. - *args, **kwargs - Arguments and keyword arguments to be passed into func. -""" - - def get_center_of_mass( comass: Optional[float], span: Optional[float], @@ -283,37 +277,33 @@ def _get_window_indexer(self) -> BaseIndexer: """ return ExponentialMovingWindowIndexer() - _agg_see_also_doc = dedent( - """ - See Also - -------- - pandas.DataFrame.rolling.aggregate - """ - ) - - _agg_examples_doc = dedent( - """ - Examples - -------- - >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) - >>> df - A B C - 0 1 4 7 - 1 2 5 8 - 2 3 6 9 - - >>> df.ewm(alpha=0.5).mean() - A B C - 0 1.000000 4.000000 7.000000 - 1 1.666667 4.666667 7.666667 - 2 2.428571 5.428571 8.428571 - """ - ) - @doc( _shared_docs["aggregate"], - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, + see_also=dedent( + """ + See Also + -------- + pandas.DataFrame.rolling.aggregate + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 1.000000 4.000000 7.000000 + 1 1.666667 4.666667 7.666667 + 2 2.428571 5.428571 8.428571 + """ + ), klass="Series/Dataframe", axis="", ) @@ -322,17 +312,20 @@ def aggregate(self, func, *args, **kwargs): agg = aggregate - @Substitution(name="ewm", func_name="mean") - @Appender(_doc_template) + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) mean", + agg_method="mean", + ) def mean(self, *args, **kwargs): - """ - Exponential weighted moving average. - - Parameters - ---------- - *args, **kwargs - Arguments and keyword arguments to be passed into func. - """ nv.validate_window_func("mean", args, kwargs) if self.times is not None: window_func = window_aggregations.ewma_time @@ -351,13 +344,26 @@ def mean(self, *args, **kwargs): ) return self._apply(window_func) - @Substitution(name="ewm", func_name="std") - @Appender(_doc_template) - @Appender(_bias_template) - def std(self, bias: bool = False, *args, **kwargs): - """ - Exponential weighted moving stddev. + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + bias : bool, default False + Use a standard estimation bias correction. """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) standard deviation", + agg_method="std", + ) + def std(self, bias: bool = False, *args, **kwargs): nv.validate_window_func("std", args, kwargs) return zsqrt(self.var(bias=bias, **kwargs)) @@ -372,13 +378,26 @@ def vol(self, bias: bool = False, *args, **kwargs): ) return self.std(bias, *args, **kwargs) - @Substitution(name="ewm", func_name="var") - @Appender(_doc_template) - @Appender(_bias_template) - def var(self, bias: bool = False, *args, **kwargs): - """ - Exponential weighted moving variance. + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + bias : bool, default False + Use a standard estimation bias correction. """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) variance", + agg_method="var", + ) + def var(self, bias: bool = False, *args, **kwargs): nv.validate_window_func("var", args, kwargs) window_func = window_aggregations.ewmcov window_func = partial( @@ -394,20 +413,11 @@ def var_func(values, begin, end, min_periods): return self._apply(var_func) - @Substitution(name="ewm", func_name="cov") - @Appender(_doc_template) - def cov( - self, - other: Optional[Union[np.ndarray, FrameOrSeries]] = None, - pairwise: Optional[bool] = None, - bias: bool = False, - **kwargs, - ): - """ - Exponential weighted sample covariance. - - Parameters - ---------- + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ other : Series, DataFrame, or ndarray, optional If not supplied then will default to self and produce pairwise output. @@ -420,9 +430,24 @@ def cov( observations will be used. bias : bool, default False Use a standard estimation bias correction. - **kwargs - Keyword arguments to be passed into func. """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) sample covariance", + agg_method="cov", + ) + def cov( + self, + other: Optional[Union[np.ndarray, FrameOrSeries]] = None, + pairwise: Optional[bool] = None, + bias: bool = False, + **kwargs, + ): if other is None: other = self._selected_obj # only default unset @@ -449,19 +474,11 @@ def _get_cov(X, Y): self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise) ) - @Substitution(name="ewm", func_name="corr") - @Appender(_doc_template) - def corr( - self, - other: Optional[Union[np.ndarray, FrameOrSeries]] = None, - pairwise: Optional[bool] = None, - **kwargs, - ): - """ - Exponential weighted sample correlation. - - Parameters - ---------- + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ other : Series, DataFrame, or ndarray, optional If not supplied then will default to self and produce pairwise output. @@ -472,9 +489,23 @@ def corr( output will be a MultiIndex DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. - **kwargs - Keyword arguments to be passed into func. """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="ewm", + aggregation_description="(exponential weighted moment) sample correlation", + agg_method="corr", + ) + def corr( + self, + other: Optional[Union[np.ndarray, FrameOrSeries]] = None, + pairwise: Optional[bool] = None, + **kwargs, + ): if other is None: other = self._selected_obj # only default unset diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index d215fd1d84ca5..f91441de41448 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -5,9 +5,20 @@ from pandas._typing import FrameOrSeries from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution, doc - -from pandas.core.window.common import _doc_template, _shared_docs +from pandas.util._decorators import doc + +from pandas.core.window.doc import ( + _shared_docs, + args_compat, + create_section_header, + kwargs_compat, + numba_notes, + template_header, + template_returns, + template_see_also, + window_agg_numba_parameters, + window_apply_parameters, +) from pandas.core.window.indexers import BaseIndexer, ExpandingIndexer, GroupbyIndexer from pandas.core.window.rolling import BaseWindowGroupby, RollingAndExpandingMixin @@ -82,38 +93,34 @@ def _get_window_indexer(self) -> BaseIndexer: """ return ExpandingIndexer() - _agg_see_also_doc = dedent( - """ - See Also - -------- - pandas.DataFrame.aggregate : Similar DataFrame method. - pandas.Series.aggregate : Similar Series method. - """ - ) - - _agg_examples_doc = dedent( - """ - Examples - -------- - >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) - >>> df - A B C - 0 1 4 7 - 1 2 5 8 - 2 3 6 9 - - >>> df.ewm(alpha=0.5).mean() - A B C - 0 1.000000 4.000000 7.000000 - 1 1.666667 4.666667 7.666667 - 2 2.428571 5.428571 8.428571 - """ - ) - @doc( _shared_docs["aggregate"], - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, + see_also=dedent( + """ + See Also + -------- + pandas.DataFrame.aggregate : Similar DataFrame method. + pandas.Series.aggregate : Similar Series method. + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 1.000000 4.000000 7.000000 + 1 1.666667 4.666667 7.666667 + 2 2.428571 5.428571 8.428571 + """ + ), klass="Series/Dataframe", axis="", ) @@ -122,13 +129,31 @@ def aggregate(self, func, *args, **kwargs): agg = aggregate - @Substitution(name="expanding") - @Appender(_shared_docs["count"]) + @doc( + template_header, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="count of non NaN observations", + agg_method="count", + ) def count(self): return super().count() - @Substitution(name="expanding") - @Appender(_shared_docs["apply"]) + @doc( + template_header, + create_section_header("Parameters"), + window_apply_parameters, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="custom aggregation function", + agg_method="apply", + ) def apply( self, func: Callable[..., Any], @@ -147,92 +172,328 @@ def apply( kwargs=kwargs, ) - @Substitution(name="expanding") - @Appender(_shared_docs["sum"]) + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + window_method="expanding", + aggregation_description="sum", + agg_method="sum", + ) def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("sum", args, kwargs) return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) - @Substitution(name="expanding", func_name="max") - @Appender(_doc_template) - @Appender(_shared_docs["max"]) + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + window_method="expanding", + aggregation_description="maximum", + agg_method="max", + ) def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("max", args, kwargs) return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) - @Substitution(name="expanding") - @Appender(_shared_docs["min"]) + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + window_method="expanding", + aggregation_description="minimum", + agg_method="min", + ) def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("min", args, kwargs) return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) - @Substitution(name="expanding") - @Appender(_shared_docs["mean"]) + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + window_method="expanding", + aggregation_description="mean", + agg_method="mean", + ) def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("mean", args, kwargs) return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) - @Substitution(name="expanding") - @Appender(_shared_docs["median"]) + @doc( + template_header, + create_section_header("Parameters"), + window_agg_numba_parameters, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + window_method="expanding", + aggregation_description="median", + agg_method="median", + ) def median(self, engine=None, engine_kwargs=None, **kwargs): return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs) - @Substitution(name="expanding", versionadded="") - @Appender(_shared_docs["std"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.\n + """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.std : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.std` is different + than the default ``ddof`` of 0 in :func:`numpy.std`. + + A minimum of one period is required for the rolling calculation.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + + >>> s.expanding(3).std() + 0 NaN + 1 NaN + 2 0.577350 + 3 0.957427 + 4 0.894427 + 5 0.836660 + 6 0.786796 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="standard deviation", + agg_method="std", + ) def std(self, ddof: int = 1, *args, **kwargs): nv.validate_expanding_func("std", args, kwargs) return super().std(ddof=ddof, **kwargs) - @Substitution(name="expanding", versionadded="") - @Appender(_shared_docs["var"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.\n + """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.var : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.var` is different + than the default ``ddof`` of 0 in :func:`numpy.var`. + + A minimum of one period is required for the rolling calculation.\n + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + + >>> s.expanding(3).var() + 0 NaN + 1 NaN + 2 0.333333 + 3 0.916667 + 4 0.800000 + 5 0.700000 + 6 0.619048 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="variance", + agg_method="var", + ) def var(self, ddof: int = 1, *args, **kwargs): nv.validate_expanding_func("var", args, kwargs) return super().var(ddof=ddof, **kwargs) - @Substitution(name="expanding") - @Appender(_shared_docs["sem"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements.\n + """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + "A minimum of one period is required for the calculation.\n", + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([0, 1, 2, 3]) + + >>> s.expanding().sem() + 0 NaN + 1 0.707107 + 2 0.707107 + 3 0.745356 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="standard error of mean", + agg_method="sem", + ) def sem(self, ddof: int = 1, *args, **kwargs): return super().sem(ddof=ddof, **kwargs) - @Substitution(name="expanding", func_name="skew") - @Appender(_doc_template) - @Appender(_shared_docs["skew"]) + @doc( + template_header, + create_section_header("Parameters"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.skew : Third moment of a probability density.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of three periods is required for the rolling calculation.\n", + window_method="expanding", + aggregation_description="unbiased skewness", + agg_method="skew", + ) def skew(self, **kwargs): return super().skew(**kwargs) - _agg_doc = dedent( + @doc( + template_header, + create_section_header("Parameters"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.kurtosis : Reference SciPy method.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of four periods is required for the calculation.\n", + create_section_header("Examples"), + dedent( + """ + The example below will show a rolling calculation with a window size of + four matching the equivalent function call using `scipy.stats`. + + >>> arr = [1, 2, 3, 4, 999] + >>> import scipy.stats + >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}") + -1.200000 + >>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}") + 4.999874 + >>> s = pd.Series(arr) + >>> s.expanding(4).kurt() + 0 NaN + 1 NaN + 2 NaN + 3 -1.200000 + 4 4.999874 + dtype: float64 """ - Examples - -------- - - The example below will show an expanding calculation with a window size of - four matching the equivalent function call using `scipy.stats`. - - >>> arr = [1, 2, 3, 4, 999] - >>> import scipy.stats - >>> print(f"{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}") - -1.200000 - >>> print(f"{scipy.stats.kurtosis(arr, bias=False):.6f}") - 4.999874 - >>> s = pd.Series(arr) - >>> s.expanding(4).kurt() - 0 NaN - 1 NaN - 2 NaN - 3 -1.200000 - 4 4.999874 - dtype: float64 - """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="Fisher's definition of kurtosis without bias", + agg_method="kurt", ) - - @Appender(_agg_doc) - @Substitution(name="expanding") - @Appender(_shared_docs["kurt"]) def kurt(self, **kwargs): return super().kurt(**kwargs) - @Substitution(name="expanding") - @Appender(_shared_docs["quantile"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + quantile : float + Quantile to compute. 0 <= quantile <= 1. + interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="quantile", + agg_method="quantile", + ) def quantile( self, quantile, @@ -245,9 +506,35 @@ def quantile( **kwargs, ) - @Substitution(name="expanding", func_name="cov") - @Appender(_doc_template) - @Appender(_shared_docs["cov"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series, DataFrame, or ndarray, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="expanding", + aggregation_description="sample covariance", + agg_method="cov", + ) def cov( self, other: Optional[Union[np.ndarray, FrameOrSeries]] = None, @@ -257,8 +544,62 @@ def cov( ): return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) - @Substitution(name="expanding") - @Appender(_shared_docs["corr"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series, DataFrame, or ndarray, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + dedent( + """ + cov : Similar method to calculate covariance. + numpy.corrcoef : NumPy Pearson's correlation calculation. + """ + ).replace("\n", "", 1), + template_see_also, + create_section_header("Notes"), + dedent( + """ + This function uses Pearson's definition of correlation + (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). + + When `other` is not specified, the output will be self correlation (e.g. + all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` + set to `True`. + + Function will return ``NaN`` for correlations of equal valued sequences; + this is the result of a 0/0 division error. + + When `pairwise` is set to `False`, only matching columns between `self` and + `other` will be used. + + When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame + with the original index on the first level, and the `other` DataFrame + columns on the second level. + + In the case of missing elements, only complete pairwise observations + will be used. + """ + ).replace("\n", "", 1), + window_method="expanding", + aggregation_description="correlation", + agg_method="corr", + ) def corr( self, other: Optional[Union[np.ndarray, FrameOrSeries]] = None, diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 7b438c51b9ac7..06302fd49feb9 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -29,7 +29,7 @@ from pandas._typing import ArrayLike, Axis, FrameOrSeries, FrameOrSeriesUnion from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution, doc +from pandas.util._decorators import doc from pandas.core.dtypes.common import ( ensure_float64, @@ -54,11 +54,19 @@ from pandas.core.groupby.base import GotItemMixin, ShallowMixin from pandas.core.indexes.api import Index, MultiIndex from pandas.core.util.numba_ import NUMBA_FUNC_CACHE, maybe_use_numba -from pandas.core.window.common import ( - _doc_template, +from pandas.core.window.common import flex_binary_moment, zsqrt +from pandas.core.window.doc import ( _shared_docs, - flex_binary_moment, - zsqrt, + args_compat, + create_section_header, + kwargs_compat, + kwargs_scipy, + numba_notes, + template_header, + template_returns, + template_see_also, + window_agg_numba_parameters, + window_apply_parameters, ) from pandas.core.window.indexers import ( BaseIndexer, @@ -486,249 +494,6 @@ def aggregate(self, func, *args, **kwargs): agg = aggregate - _shared_docs["sum"] = dedent( - """ - Calculate %(name)s sum of given DataFrame or Series. - - Parameters - ---------- - *args, **kwargs - For compatibility with other %(name)s methods. Has no effect - on the computed value. - - Returns - ------- - Series or DataFrame - Same type as the input, with the same index, containing the - %(name)s sum. - - See Also - -------- - pandas.Series.sum : Reducing sum for Series. - pandas.DataFrame.sum : Reducing sum for DataFrame. - - Examples - -------- - >>> s = pd.Series([1, 2, 3, 4, 5]) - >>> s - 0 1 - 1 2 - 2 3 - 3 4 - 4 5 - dtype: int64 - - >>> s.rolling(3).sum() - 0 NaN - 1 NaN - 2 6.0 - 3 9.0 - 4 12.0 - dtype: float64 - - >>> s.expanding(3).sum() - 0 NaN - 1 NaN - 2 6.0 - 3 10.0 - 4 15.0 - dtype: float64 - - >>> s.rolling(3, center=True).sum() - 0 NaN - 1 6.0 - 2 9.0 - 3 12.0 - 4 NaN - dtype: float64 - - For DataFrame, each %(name)s sum is computed column-wise. - - >>> df = pd.DataFrame({"A": s, "B": s ** 2}) - >>> df - A B - 0 1 1 - 1 2 4 - 2 3 9 - 3 4 16 - 4 5 25 - - >>> df.rolling(3).sum() - A B - 0 NaN NaN - 1 NaN NaN - 2 6.0 14.0 - 3 9.0 29.0 - 4 12.0 50.0 - """ - ) - - _shared_docs["mean"] = dedent( - """ - Calculate the %(name)s mean of the values. - - Parameters - ---------- - *args - Under Review. - **kwargs - Under Review. - - Returns - ------- - Series or DataFrame - Returned object type is determined by the caller of the %(name)s - calculation. - - See Also - -------- - pandas.Series.%(name)s : Calling object with Series data. - pandas.DataFrame.%(name)s : Calling object with DataFrames. - pandas.Series.mean : Equivalent method for Series. - pandas.DataFrame.mean : Equivalent method for DataFrame. - - Examples - -------- - The below examples will show rolling mean calculations with window sizes of - two and three, respectively. - - >>> s = pd.Series([1, 2, 3, 4]) - >>> s.rolling(2).mean() - 0 NaN - 1 1.5 - 2 2.5 - 3 3.5 - dtype: float64 - - >>> s.rolling(3).mean() - 0 NaN - 1 NaN - 2 2.0 - 3 3.0 - dtype: float64 - """ - ) - - _shared_docs["var"] = dedent( - """ - Calculate unbiased %(name)s variance. - %(versionadded)s - Normalized by N-1 by default. This can be changed using the `ddof` - argument. - - Parameters - ---------- - ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - *args, **kwargs - For NumPy compatibility. No additional arguments are used. - - Returns - ------- - Series or DataFrame - Returns the same object type as the caller of the %(name)s calculation. - - See Also - -------- - pandas.Series.%(name)s : Calling object with Series data. - pandas.DataFrame.%(name)s : Calling object with DataFrames. - pandas.Series.var : Equivalent method for Series. - pandas.DataFrame.var : Equivalent method for DataFrame. - numpy.var : Equivalent method for Numpy array. - - Notes - ----- - The default `ddof` of 1 used in :meth:`Series.var` is different than the - default `ddof` of 0 in :func:`numpy.var`. - - A minimum of 1 period is required for the rolling calculation. - - Examples - -------- - >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) - >>> s.rolling(3).var() - 0 NaN - 1 NaN - 2 0.333333 - 3 1.000000 - 4 1.000000 - 5 1.333333 - 6 0.000000 - dtype: float64 - - >>> s.expanding(3).var() - 0 NaN - 1 NaN - 2 0.333333 - 3 0.916667 - 4 0.800000 - 5 0.700000 - 6 0.619048 - dtype: float64 - """ - ) - - _shared_docs["std"] = dedent( - """ - Calculate %(name)s standard deviation. - %(versionadded)s - Normalized by N-1 by default. This can be changed using the `ddof` - argument. - - Parameters - ---------- - ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - *args, **kwargs - For NumPy compatibility. No additional arguments are used. - - Returns - ------- - Series or DataFrame - Returns the same object type as the caller of the %(name)s calculation. - - See Also - -------- - pandas.Series.%(name)s : Calling object with Series data. - pandas.DataFrame.%(name)s : Calling object with DataFrames. - pandas.Series.std : Equivalent method for Series. - pandas.DataFrame.std : Equivalent method for DataFrame. - numpy.std : Equivalent method for Numpy array. - - Notes - ----- - The default `ddof` of 1 used in Series.std is different than the default - `ddof` of 0 in numpy.std. - - A minimum of one period is required for the rolling calculation. - - Examples - -------- - >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) - >>> s.rolling(3).std() - 0 NaN - 1 NaN - 2 0.577350 - 3 1.000000 - 4 1.000000 - 5 1.154701 - 6 0.000000 - dtype: float64 - - >>> s.expanding(3).std() - 0 NaN - 1 NaN - 2 0.577350 - 3 0.957427 - 4 0.894427 - 5 0.836660 - 6 0.786796 - dtype: float64 - """ - ) - def dispatch(name: str, *args, **kwargs): """ @@ -1114,38 +879,34 @@ def calc(x): return self._apply_blockwise(homogeneous_func, name) - _agg_see_also_doc = dedent( - """ - See Also - -------- - pandas.DataFrame.aggregate : Similar DataFrame method. - pandas.Series.aggregate : Similar Series method. - """ - ) - - _agg_examples_doc = dedent( - """ - Examples - -------- - >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) - >>> df - A B C - 0 1 4 7 - 1 2 5 8 - 2 3 6 9 - - >>> df.rolling(2, win_type="boxcar").agg("mean") - A B C - 0 NaN NaN NaN - 1 1.5 4.5 7.5 - 2 2.5 5.5 8.5 - """ - ) - @doc( _shared_docs["aggregate"], - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, + see_also=dedent( + """ + See Also + -------- + pandas.DataFrame.aggregate : Similar DataFrame method. + pandas.Series.aggregate : Similar Series method. + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.rolling(2, win_type="boxcar").agg("mean") + A B C + 0 NaN NaN NaN + 1 1.5 4.5 7.5 + 2 2.5 5.5 8.5 + """ + ), klass="Series/DataFrame", axis="", ) @@ -1160,144 +921,82 @@ def aggregate(self, func, *args, **kwargs): agg = aggregate - @Substitution(name="window") - @Appender(_shared_docs["sum"]) + @doc( + template_header, + create_section_header("Parameters"), + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window sum", + agg_method="sum", + ) def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) window_func = window_aggregations.roll_weighted_sum return self._apply(window_func, name="sum", **kwargs) - @Substitution(name="window") - @Appender(_shared_docs["mean"]) + @doc( + template_header, + create_section_header("Parameters"), + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window mean", + agg_method="mean", + ) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) window_func = window_aggregations.roll_weighted_mean return self._apply(window_func, name="mean", **kwargs) - @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") - @Appender(_shared_docs["var"]) + @doc( + template_header, + ".. versionadded:: 1.0.0 \n\n", + create_section_header("Parameters"), + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window variance", + agg_method="var", + ) def var(self, ddof: int = 1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) window_func = partial(window_aggregations.roll_weighted_var, ddof=ddof) kwargs.pop("name", None) return self._apply(window_func, name="var", **kwargs) - @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") - @Appender(_shared_docs["std"]) + @doc( + template_header, + ".. versionadded:: 1.0.0 \n\n", + create_section_header("Parameters"), + kwargs_scipy, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="weighted window standard deviation", + agg_method="std", + ) def std(self, ddof: int = 1, *args, **kwargs): nv.validate_window_func("std", args, kwargs) return zsqrt(self.var(ddof=ddof, name="std", **kwargs)) class RollingAndExpandingMixin(BaseWindow): - - _shared_docs["count"] = dedent( - r""" - The %(name)s count of any non-NaN observations inside the window. - - Returns - ------- - Series or DataFrame - Returned object type is determined by the caller of the %(name)s - calculation. - - See Also - -------- - pandas.Series.%(name)s : Calling object with Series data. - pandas.DataFrame.%(name)s : Calling object with DataFrames. - pandas.DataFrame.count : Count of the full DataFrame. - - Examples - -------- - >>> s = pd.Series([2, 3, np.nan, 10]) - >>> s.rolling(2).count() - 0 1.0 - 1 2.0 - 2 1.0 - 3 1.0 - dtype: float64 - >>> s.rolling(3).count() - 0 1.0 - 1 2.0 - 2 2.0 - 3 2.0 - dtype: float64 - >>> s.rolling(4).count() - 0 1.0 - 1 2.0 - 2 2.0 - 3 3.0 - dtype: float64 - """ - ) - def count(self): window_func = window_aggregations.roll_sum return self._apply(window_func, name="count") - _shared_docs["apply"] = dedent( - r""" - Apply an arbitrary function to each %(name)s window. - - Parameters - ---------- - func : function - Must produce a single value from an ndarray input if ``raw=True`` - or a single value from a Series if ``raw=False``. Can also accept a - Numba JIT function with ``engine='numba'`` specified. - - .. versionchanged:: 1.0.0 - - raw : bool, default None - * ``False`` : passes each row or column as a Series to the - function. - * ``True`` : the passed function will receive ndarray - objects instead. - If you are just applying a NumPy reduction function this will - achieve much better performance. - - engine : str, default None - * ``'cython'`` : Runs rolling apply through C-extensions from cython. - * ``'numba'`` : Runs rolling apply through JIT compiled code from numba. - Only available when ``raw`` is set to ``True``. - * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - - .. versionadded:: 1.0.0 - - engine_kwargs : dict, default None - * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` - * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` - and ``parallel`` dictionary keys. The values must either be ``True`` or - ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is - ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be - applied to both the ``func`` and the ``apply`` rolling aggregation. - - .. versionadded:: 1.0.0 - - args : tuple, default None - Positional arguments to be passed into func. - kwargs : dict, default None - Keyword arguments to be passed into func. - - Returns - ------- - Series or DataFrame - Return type is determined by the caller. - - See Also - -------- - pandas.Series.%(name)s : Calling object with Series data. - pandas.DataFrame.%(name)s : Calling object with DataFrame data. - pandas.Series.apply : Similar method for Series. - pandas.DataFrame.apply : Similar method for DataFrame. - - Notes - ----- - See :ref:`window.numba_engine` for extended documentation and performance - considerations for the Numba engine. - """ - ) - def apply( self, func: Callable[..., Any], @@ -1383,34 +1082,6 @@ def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): window_func = window_aggregations.roll_sum return self._apply(window_func, name="sum", **kwargs) - _shared_docs["max"] = dedent( - """ - Calculate the %(name)s maximum. - - Parameters - ---------- - engine : str, default None - * ``'cython'`` : Runs rolling max through C-extensions from cython. - * ``'numba'`` : Runs rolling max through JIT compiled code from numba. - * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - - .. versionadded:: 1.3.0 - - engine_kwargs : dict, default None - * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` - * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` - and ``parallel`` dictionary keys. The values must either be ``True`` or - ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is - ``{'nopython': True, 'nogil': False, 'parallel': False}`` - - .. versionadded:: 1.3.0 - - **kwargs - For compatibility with other %(name)s methods. Has no effect on - the result. - """ - ) - def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("max", args, kwargs) if maybe_use_numba(engine): @@ -1428,60 +1099,6 @@ def max(self, *args, engine=None, engine_kwargs=None, **kwargs): window_func = window_aggregations.roll_max return self._apply(window_func, name="max", **kwargs) - _shared_docs["min"] = dedent( - """ - Calculate the %(name)s minimum. - - Parameters - ---------- - engine : str, default None - * ``'cython'`` : Runs rolling min through C-extensions from cython. - * ``'numba'`` : Runs rolling min through JIT compiled code from numba. - * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - - .. versionadded:: 1.3.0 - - engine_kwargs : dict, default None - * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` - * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` - and ``parallel`` dictionary keys. The values must either be ``True`` or - ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is - ``{'nopython': True, 'nogil': False, 'parallel': False}`` - - .. versionadded:: 1.3.0 - - **kwargs - For compatibility with other %(name)s methods. Has no effect on - the result. - - Returns - ------- - Series or DataFrame - Returned object type is determined by the caller of the %(name)s - calculation. - - See Also - -------- - pandas.Series.%(name)s : Calling object with a Series. - pandas.DataFrame.%(name)s : Calling object with a DataFrame. - pandas.Series.min : Similar method for Series. - pandas.DataFrame.min : Similar method for DataFrame. - - Examples - -------- - Performing a rolling minimum with a window size of 3. - - >>> s = pd.Series([4, 3, 5, 2, 6]) - >>> s.rolling(3).min() - 0 NaN - 1 NaN - 2 3.0 - 3 2.0 - 4 2.0 - dtype: float64 - """ - ) - def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("min", args, kwargs) if maybe_use_numba(engine): @@ -1516,59 +1133,6 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): window_func = window_aggregations.roll_mean return self._apply(window_func, name="mean", **kwargs) - _shared_docs["median"] = dedent( - """ - Calculate the %(name)s median. - - Parameters - ---------- - engine : str, default None - * ``'cython'`` : Runs rolling median through C-extensions from cython. - * ``'numba'`` : Runs rolling median through JIT compiled code from numba. - * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - - .. versionadded:: 1.3.0 - - engine_kwargs : dict, default None - * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` - * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` - and ``parallel`` dictionary keys. The values must either be ``True`` or - ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is - ``{'nopython': True, 'nogil': False, 'parallel': False}`` - - .. versionadded:: 1.3.0 - - **kwargs - For compatibility with other %(name)s methods. Has no effect - on the computed result. - - Returns - ------- - Series or DataFrame - Returned type is the same as the original object. - - See Also - -------- - pandas.Series.%(name)s : Calling object with Series data. - pandas.DataFrame.%(name)s : Calling object with DataFrames. - pandas.Series.median : Equivalent method for Series. - pandas.DataFrame.median : Equivalent method for DataFrame. - - Examples - -------- - Compute the rolling median of a series with a window size of 3. - - >>> s = pd.Series([0, 1, 2, 3, 4]) - >>> s.rolling(3).median() - 0 NaN - 1 NaN - 2 1.0 - 3 2.0 - 4 3.0 - dtype: float64 - """ - ) - def median(self, engine=None, engine_kwargs=None, **kwargs): if maybe_use_numba(engine): if self.method == "table": @@ -1607,18 +1171,6 @@ def var(self, ddof: int = 1, *args, **kwargs): **kwargs, ) - _shared_docs[ - "skew" - ] = """ - Unbiased %(name)s skewness. - - Parameters - ---------- - **kwargs - For compatibility with other %(name)s methods. Has no effect on - the result. - """ - def skew(self, **kwargs): window_func = window_aggregations.roll_skew return self._apply( @@ -1627,92 +1179,9 @@ def skew(self, **kwargs): **kwargs, ) - _shared_docs["kurt"] = dedent( - """ - Calculate unbiased %(name)s kurtosis. - - This function uses Fisher's definition of kurtosis without bias. - - Parameters - ---------- - **kwargs - For compatibility with other %(name)s methods. Has no effect on - the result. - - Returns - ------- - Series or DataFrame - Returned object type is determined by the caller of the %(name)s - calculation. - - See Also - -------- - pandas.Series.%(name)s : Calling object with Series data. - pandas.DataFrame.%(name)s : Calling object with DataFrames. - pandas.Series.kurt : Equivalent method for Series. - pandas.DataFrame.kurt : Equivalent method for DataFrame. - scipy.stats.skew : Third moment of a probability density. - scipy.stats.kurtosis : Reference SciPy method. - - Notes - ----- - A minimum of 4 periods is required for the %(name)s calculation. - """ - ) - def sem(self, ddof: int = 1, *args, **kwargs): return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5) - _shared_docs["sem"] = dedent( - """ - Compute %(name)s standard error of mean. - - Parameters - ---------- - - ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - - *args, **kwargs - For NumPy compatibility. No additional arguments are used. - - Returns - ------- - Series or DataFrame - Returned object type is determined by the caller of the %(name)s - calculation. - - See Also - -------- - pandas.Series.%(name)s : Calling object with Series data. - pandas.DataFrame.%(name)s : Calling object with DataFrames. - pandas.Series.sem : Equivalent method for Series. - pandas.DataFrame.sem : Equivalent method for DataFrame. - - Notes - ----- - A minimum of one period is required for the rolling calculation. - - Examples - -------- - >>> s = pd.Series([0, 1, 2, 3]) - >>> s.rolling(2, min_periods=1).sem() - 0 NaN - 1 0.707107 - 2 0.707107 - 3 0.707107 - dtype: float64 - - >>> s.expanding().sem() - 0 NaN - 1 0.707107 - 2 0.707107 - 3 0.745356 - dtype: float64 - """ - ) - def kurt(self, **kwargs): window_func = window_aggregations.roll_kurt return self._apply( @@ -1721,78 +1190,6 @@ def kurt(self, **kwargs): **kwargs, ) - _shared_docs["quantile"] = dedent( - """ - Calculate the %(name)s quantile. - - Parameters - ---------- - quantile : float - Quantile to compute. 0 <= quantile <= 1. - - interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} - This optional parameter specifies the interpolation method to use, - when the desired quantile lies between two data points `i` and `j`: - - * linear: `i + (j - i) * fraction`, where `fraction` is the - fractional part of the index surrounded by `i` and `j`. - * lower: `i`. - * higher: `j`. - * nearest: `i` or `j` whichever is nearest. - * midpoint: (`i` + `j`) / 2. - - engine : str, default None - * ``'cython'`` : Runs rolling quantile through C-extensions from cython. - * ``'numba'`` : Runs rolling quantile through JIT compiled code from numba. - * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` - - .. versionadded:: 1.3.0 - - engine_kwargs : dict, default None - * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` - * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` - and ``parallel`` dictionary keys. The values must either be ``True`` or - ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is - ``{'nopython': True, 'nogil': False, 'parallel': False}`` - - .. versionadded:: 1.3.0 - - **kwargs - For compatibility with other %(name)s methods. Has no effect on - the result. - - Returns - ------- - Series or DataFrame - Returned object type is determined by the caller of the %(name)s - calculation. - - See Also - -------- - pandas.Series.quantile : Computes value at the given quantile over all data - in Series. - pandas.DataFrame.quantile : Computes values at the given quantile over - requested axis in DataFrame. - - Examples - -------- - >>> s = pd.Series([1, 2, 3, 4]) - >>> s.rolling(2).quantile(.4, interpolation='lower') - 0 NaN - 1 1.0 - 2 2.0 - 3 3.0 - dtype: float64 - - >>> s.rolling(2).quantile(.4, interpolation='midpoint') - 0 NaN - 1 1.5 - 2 2.5 - 3 3.5 - dtype: float64 - """ - ) - def quantile(self, quantile: float, interpolation: str = "linear", **kwargs): if quantile == 1.0: window_func = window_aggregations.roll_max @@ -1807,30 +1204,6 @@ def quantile(self, quantile: float, interpolation: str = "linear", **kwargs): return self._apply(window_func, name="quantile", **kwargs) - _shared_docs[ - "cov" - ] = """ - Calculate the %(name)s sample covariance. - - Parameters - ---------- - other : Series, DataFrame, or ndarray, optional - If not supplied then will default to self and produce pairwise - output. - pairwise : bool, default None - If False then only matching columns between self and other will be - used and the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the - output will be a MultiIndexed DataFrame in the case of DataFrame - inputs. In the case of missing elements, only complete pairwise - observations will be used. - ddof : int, default 1 - Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - **kwargs - Keyword arguments to be passed into func. - """ - def cov(self, other=None, pairwise=None, ddof=1, **kwargs): if other is None: other = self._selected_obj @@ -1870,116 +1243,6 @@ def cov_func(x, y): self._selected_obj, other, cov_func, pairwise=bool(pairwise) ) - _shared_docs["corr"] = dedent( - """ - Calculate %(name)s correlation. - - Parameters - ---------- - other : Series, DataFrame, or ndarray, optional - If not supplied then will default to self. - pairwise : bool, default None - Calculate pairwise combinations of columns within a - DataFrame. If `other` is not specified, defaults to `True`, - otherwise defaults to `False`. - Not relevant for :class:`~pandas.Series`. - **kwargs - Unused. - - Returns - ------- - Series or DataFrame - Returned object type is determined by the caller of the - %(name)s calculation. - - See Also - -------- - pandas.Series.%(name)s : Calling object with Series data. - pandas.DataFrame.%(name)s : Calling object with DataFrames. - pandas.Series.corr : Equivalent method for Series. - pandas.DataFrame.corr : Equivalent method for DataFrame. - cov : Similar method to calculate covariance. - numpy.corrcoef : NumPy Pearson's correlation calculation. - - Notes - ----- - This function uses Pearson's definition of correlation - (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). - - When `other` is not specified, the output will be self correlation (e.g. - all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` - set to `True`. - - Function will return ``NaN`` for correlations of equal valued sequences; - this is the result of a 0/0 division error. - - When `pairwise` is set to `False`, only matching columns between `self` and - `other` will be used. - - When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame - with the original index on the first level, and the `other` DataFrame - columns on the second level. - - In the case of missing elements, only complete pairwise observations - will be used. - - Examples - -------- - The below example shows a rolling calculation with a window size of - four matching the equivalent function call using :meth:`numpy.corrcoef`. - - >>> v1 = [3, 3, 3, 5, 8] - >>> v2 = [3, 4, 4, 4, 8] - >>> # numpy returns a 2X2 array, the correlation coefficient - >>> # is the number at entry [0][1] - >>> print(f"{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}") - 0.333333 - >>> print(f"{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}") - 0.916949 - >>> s1 = pd.Series(v1) - >>> s2 = pd.Series(v2) - >>> s1.rolling(4).corr(s2) - 0 NaN - 1 NaN - 2 NaN - 3 0.333333 - 4 0.916949 - dtype: float64 - - The below example shows a similar rolling calculation on a - DataFrame using the pairwise option. - - >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\ - [46., 31.], [50., 36.]]) - >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7)) - [[1. 0.6263001] - [0.6263001 1. ]] - >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7)) - [[1. 0.5553681] - [0.5553681 1. ]] - >>> df = pd.DataFrame(matrix, columns=['X','Y']) - >>> df - X Y - 0 51.0 35.0 - 1 49.0 30.0 - 2 47.0 32.0 - 3 46.0 31.0 - 4 50.0 36.0 - >>> df.rolling(4).corr(pairwise=True) - X Y - 0 X NaN NaN - Y NaN NaN - 1 X NaN NaN - Y NaN NaN - 2 X NaN NaN - Y NaN NaN - 3 X 1.000000 0.626300 - Y 0.626300 1.000000 - 4 X 1.000000 0.555368 - Y 0.555368 1.000000 - """ - ) - def corr(self, other=None, pairwise=None, ddof=1, **kwargs): if other is None: other = self._selected_obj @@ -2087,44 +1350,40 @@ def _raise_monotonic_error(self): formatted = "index" raise ValueError(f"{formatted} must be monotonic") - _agg_see_also_doc = dedent( - """ - See Also - -------- - pandas.Series.rolling : Calling object with Series data. - pandas.DataFrame.rolling : Calling object with DataFrame data. - """ - ) - - _agg_examples_doc = dedent( - """ - Examples - -------- - >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) - >>> df - A B C - 0 1 4 7 - 1 2 5 8 - 2 3 6 9 - - >>> df.rolling(2).sum() - A B C - 0 NaN NaN NaN - 1 3.0 9.0 15.0 - 2 5.0 11.0 17.0 - - >>> df.rolling(2).agg({"A": "sum", "B": "min"}) - A B - 0 NaN NaN - 1 3.0 4.0 - 2 5.0 5.0 - """ - ) - @doc( _shared_docs["aggregate"], - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, + see_also=dedent( + """ + See Also + -------- + pandas.Series.rolling : Calling object with Series data. + pandas.DataFrame.rolling : Calling object with DataFrame data. + """ + ), + examples=dedent( + """ + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.rolling(2).sum() + A B C + 0 NaN NaN NaN + 1 3.0 9.0 15.0 + 2 5.0 11.0 17.0 + + >>> df.rolling(2).agg({"A": "sum", "B": "min"}) + A B + 0 NaN NaN + 1 3.0 4.0 + 2 5.0 5.0 + """ + ), klass="Series/Dataframe", axis="", ) @@ -2133,8 +1392,40 @@ def aggregate(self, func, *args, **kwargs): agg = aggregate - @Substitution(name="rolling") - @Appender(_shared_docs["count"]) + @doc( + template_header, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([2, 3, np.nan, 10]) + >>> s.rolling(2).count() + 0 1.0 + 1 2.0 + 2 1.0 + 3 1.0 + dtype: float64 + >>> s.rolling(3).count() + 0 1.0 + 1 2.0 + 2 2.0 + 3 2.0 + dtype: float64 + >>> s.rolling(4).count() + 0 1.0 + 1 2.0 + 2 2.0 + 3 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="count of non NaN observations", + agg_method="count", + ) def count(self): if self.min_periods is None: warnings.warn( @@ -2148,8 +1439,18 @@ def count(self): self.min_periods = 0 return super().count() - @Substitution(name="rolling") - @Appender(_shared_docs["apply"]) + @doc( + template_header, + create_section_header("Parameters"), + window_apply_parameters, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="custom aggregation function", + agg_method="apply", + ) def apply( self, func, raw=False, engine=None, engine_kwargs=None, args=None, kwargs=None ): @@ -2162,92 +1463,444 @@ def apply( kwargs=kwargs, ) - @Substitution(name="rolling") - @Appender(_shared_docs["sum"]) + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([1, 2, 3, 4, 5]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: int64 + + >>> s.rolling(3).sum() + 0 NaN + 1 NaN + 2 6.0 + 3 9.0 + 4 12.0 + dtype: float64 + + >>> s.rolling(3, center=True).sum() + 0 NaN + 1 6.0 + 2 9.0 + 3 12.0 + 4 NaN + dtype: float64 + + For DataFrame, each sum is computed column-wise. + + >>> df = pd.DataFrame({{"A": s, "B": s ** 2}}) + >>> df + A B + 0 1 1 + 1 2 4 + 2 3 9 + 3 4 16 + 4 5 25 + + >>> df.rolling(3).sum() + A B + 0 NaN NaN + 1 NaN NaN + 2 6.0 14.0 + 3 9.0 29.0 + 4 12.0 50.0 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="sum", + agg_method="sum", + ) def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("sum", args, kwargs) return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) - @Substitution(name="rolling", func_name="max") - @Appender(_doc_template) - @Appender(_shared_docs["max"]) + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + window_method="rolling", + aggregation_description="maximum", + agg_method="max", + ) def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("max", args, kwargs) return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) - @Substitution(name="rolling") - @Appender(_shared_docs["min"]) + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + Performing a rolling minimum with a window size of 3. + + >>> s = pd.Series([4, 3, 5, 2, 6]) + >>> s.rolling(3).min() + 0 NaN + 1 NaN + 2 3.0 + 3 2.0 + 4 2.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="minimum", + agg_method="min", + ) def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("min", args, kwargs) return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) - @Substitution(name="rolling") - @Appender(_shared_docs["mean"]) + @doc( + template_header, + create_section_header("Parameters"), + args_compat, + window_agg_numba_parameters, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + The below examples will show rolling mean calculations with window sizes of + two and three, respectively. + + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.rolling(2).mean() + 0 NaN + 1 1.5 + 2 2.5 + 3 3.5 + dtype: float64 + + >>> s.rolling(3).mean() + 0 NaN + 1 NaN + 2 2.0 + 3 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="mean", + agg_method="mean", + ) def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("mean", args, kwargs) return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) - @Substitution(name="rolling") - @Appender(_shared_docs["median"]) + @doc( + template_header, + create_section_header("Parameters"), + window_agg_numba_parameters, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + numba_notes, + create_section_header("Examples"), + dedent( + """ + Compute the rolling median of a series with a window size of 3. + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.rolling(3).median() + 0 NaN + 1 NaN + 2 1.0 + 3 2.0 + 4 3.0 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="median", + agg_method="median", + ) def median(self, engine=None, engine_kwargs=None, **kwargs): return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs) - @Substitution(name="rolling", versionadded="") - @Appender(_shared_docs["std"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.std : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.std` is different + than the default ``ddof`` of 0 in :func:`numpy.std`. + + A minimum of one period is required for the rolling calculation. + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + >>> s.rolling(3).std() + 0 NaN + 1 NaN + 2 0.577350 + 3 1.000000 + 4 1.000000 + 5 1.154701 + 6 0.000000 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="standard deviation", + agg_method="std", + ) def std(self, ddof=1, *args, **kwargs): nv.validate_rolling_func("std", args, kwargs) return super().std(ddof=ddof, **kwargs) - @Substitution(name="rolling", versionadded="") - @Appender(_shared_docs["var"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "numpy.var : Equivalent method for NumPy array.\n", + template_see_also, + create_section_header("Notes"), + dedent( + """ + The default ``ddof`` of 1 used in :meth:`Series.var` is different + than the default ``ddof`` of 0 in :func:`numpy.var`. + + A minimum of one period is required for the rolling calculation. + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + >>> s.rolling(3).var() + 0 NaN + 1 NaN + 2 0.333333 + 3 1.000000 + 4 1.000000 + 5 1.333333 + 6 0.000000 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="variance", + agg_method="var", + ) def var(self, ddof=1, *args, **kwargs): nv.validate_rolling_func("var", args, kwargs) return super().var(ddof=ddof, **kwargs) - @Substitution(name="rolling", func_name="skew") - @Appender(_doc_template) - @Appender(_shared_docs["skew"]) + @doc( + template_header, + create_section_header("Parameters"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.skew : Third moment of a probability density.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of three periods is required for the rolling calculation.\n", + window_method="rolling", + aggregation_description="unbiased skewness", + agg_method="skew", + ) def skew(self, **kwargs): return super().skew(**kwargs) - @Substitution(name="rolling") - @Appender(_shared_docs["sem"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + args_compat, + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Notes"), + "A minimum of one period is required for the calculation.\n", + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([0, 1, 2, 3]) + >>> s.rolling(2, min_periods=1).sem() + 0 NaN + 1 0.707107 + 2 0.707107 + 3 0.707107 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="standard error of mean", + agg_method="sem", + ) def sem(self, ddof=1, *args, **kwargs): return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5) - _agg_doc = dedent( + @doc( + template_header, + create_section_header("Parameters"), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + "scipy.stats.kurtosis : Reference SciPy method.\n", + template_see_also, + create_section_header("Notes"), + "A minimum of four periods is required for the calculation.\n", + create_section_header("Examples"), + dedent( + """ + The example below will show a rolling calculation with a window size of + four matching the equivalent function call using `scipy.stats`. + + >>> arr = [1, 2, 3, 4, 999] + >>> import scipy.stats + >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}") + -1.200000 + >>> print(f"{{scipy.stats.kurtosis(arr[1:], bias=False):.6f}}") + 3.999946 + >>> s = pd.Series(arr) + >>> s.rolling(4).kurt() + 0 NaN + 1 NaN + 2 NaN + 3 -1.200000 + 4 3.999946 + dtype: float64 """ - Examples - -------- - - The example below will show a rolling calculation with a window size of - four matching the equivalent function call using `scipy.stats`. - - >>> arr = [1, 2, 3, 4, 999] - >>> import scipy.stats - >>> print(f"{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}") - -1.200000 - >>> print(f"{scipy.stats.kurtosis(arr[1:], bias=False):.6f}") - 3.999946 - >>> s = pd.Series(arr) - >>> s.rolling(4).kurt() - 0 NaN - 1 NaN - 2 NaN - 3 -1.200000 - 4 3.999946 - dtype: float64 - """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="Fisher's definition of kurtosis without bias", + agg_method="kurt", ) - - @Appender(_agg_doc) - @Substitution(name="rolling") - @Appender(_shared_docs["kurt"]) def kurt(self, **kwargs): return super().kurt(**kwargs) - @Substitution(name="rolling") - @Appender(_shared_docs["quantile"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + quantile : float + Quantile to compute. 0 <= quantile <= 1. + interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also, + create_section_header("Examples"), + dedent( + """ + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.rolling(2).quantile(.4, interpolation='lower') + 0 NaN + 1 1.0 + 2 2.0 + 3 3.0 + dtype: float64 + + >>> s.rolling(2).quantile(.4, interpolation='midpoint') + 0 NaN + 1 1.5 + 2 2.5 + 3 3.5 + dtype: float64 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="quantile", + agg_method="quantile", + ) def quantile(self, quantile, interpolation="linear", **kwargs): return super().quantile( quantile=quantile, @@ -2255,14 +1908,154 @@ def quantile(self, quantile, interpolation="linear", **kwargs): **kwargs, ) - @Substitution(name="rolling", func_name="cov") - @Appender(_doc_template) - @Appender(_shared_docs["cov"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series, DataFrame, or ndarray, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + template_see_also[:-1], + window_method="rolling", + aggregation_description="sample covariance", + agg_method="cov", + ) def cov(self, other=None, pairwise=None, ddof=1, **kwargs): return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) - @Substitution(name="rolling") - @Appender(_shared_docs["corr"]) + @doc( + template_header, + create_section_header("Parameters"), + dedent( + """ + other : Series, DataFrame, or ndarray, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + """ + ).replace("\n", "", 1), + kwargs_compat, + create_section_header("Returns"), + template_returns, + create_section_header("See Also"), + dedent( + """ + cov : Similar method to calculate covariance. + numpy.corrcoef : NumPy Pearson's correlation calculation. + """ + ).replace("\n", "", 1), + template_see_also, + create_section_header("Notes"), + dedent( + """ + This function uses Pearson's definition of correlation + (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). + + When `other` is not specified, the output will be self correlation (e.g. + all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` + set to `True`. + + Function will return ``NaN`` for correlations of equal valued sequences; + this is the result of a 0/0 division error. + + When `pairwise` is set to `False`, only matching columns between `self` and + `other` will be used. + + When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame + with the original index on the first level, and the `other` DataFrame + columns on the second level. + + In the case of missing elements, only complete pairwise observations + will be used. + """ + ).replace("\n", "", 1), + create_section_header("Examples"), + dedent( + """ + The below example shows a rolling calculation with a window size of + four matching the equivalent function call using :meth:`numpy.corrcoef`. + + >>> v1 = [3, 3, 3, 5, 8] + >>> v2 = [3, 4, 4, 4, 8] + >>> # numpy returns a 2X2 array, the correlation coefficient + >>> # is the number at entry [0][1] + >>> print(f"{{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}}") + 0.333333 + >>> print(f"{{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}}") + 0.916949 + >>> s1 = pd.Series(v1) + >>> s2 = pd.Series(v2) + >>> s1.rolling(4).corr(s2) + 0 NaN + 1 NaN + 2 NaN + 3 0.333333 + 4 0.916949 + dtype: float64 + + The below example shows a similar rolling calculation on a + DataFrame using the pairwise option. + + >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\ + [46., 31.], [50., 36.]]) + >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7)) + [[1. 0.6263001] + [0.6263001 1. ]] + >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7)) + [[1. 0.5553681] + [0.5553681 1. ]] + >>> df = pd.DataFrame(matrix, columns=['X','Y']) + >>> df + X Y + 0 51.0 35.0 + 1 49.0 30.0 + 2 47.0 32.0 + 3 46.0 31.0 + 4 50.0 36.0 + >>> df.rolling(4).corr(pairwise=True) + X Y + 0 X NaN NaN + Y NaN NaN + 1 X NaN NaN + Y NaN NaN + 2 X NaN NaN + Y NaN NaN + 3 X 1.000000 0.626300 + Y 0.626300 1.000000 + 4 X 1.000000 0.555368 + Y 0.555368 1.000000 + """ + ).replace("\n", "", 1), + window_method="rolling", + aggregation_description="correlation", + agg_method="corr", + ) def corr(self, other=None, pairwise=None, ddof=1, **kwargs): return super().corr(other=other, pairwise=pairwise, ddof=ddof, **kwargs)