From db2f86d3078e5034b61896e07a18a828344ca78c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 30 Dec 2020 15:36:14 -0800 Subject: [PATCH 01/18] Add engine arguments to methods that support numpy nan methods --- pandas/core/window/expanding.py | 47 ++++++++++++++-------- pandas/core/window/rolling.py | 70 ++++++++++++++++++++++----------- 2 files changed, 77 insertions(+), 40 deletions(-) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 81aa6699c3c61..8dacb193c423b 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -172,45 +172,49 @@ def apply( @Substitution(name="expanding") @Appender(_shared_docs["sum"]) - def sum(self, *args, **kwargs): + def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("sum", args, kwargs) - return super().sum(*args, **kwargs) + return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="expanding", func_name="max") @Appender(_doc_template) @Appender(_shared_docs["max"]) - def max(self, *args, **kwargs): + def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("max", args, kwargs) - return super().max(*args, **kwargs) + return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["min"]) - def min(self, *args, **kwargs): + def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("min", args, kwargs) - return super().min(*args, **kwargs) + return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["mean"]) - def mean(self, *args, **kwargs): + def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("mean", args, kwargs) - return super().mean(*args, **kwargs) + return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["median"]) - def median(self, **kwargs): - return super().median(**kwargs) + def median(self, engine=None, engine_kwargs=None, **kwargs): + return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="expanding", versionadded="") @Appender(_shared_docs["std"]) - def std(self, ddof: int = 1, *args, **kwargs): + def std(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): nv.validate_expanding_func("std", args, kwargs) - return super().std(ddof=ddof, **kwargs) + return super().std( + ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) @Substitution(name="expanding", versionadded="") @Appender(_shared_docs["var"]) - def var(self, ddof: int = 1, *args, **kwargs): + def var(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): nv.validate_expanding_func("var", args, kwargs) - return super().var(ddof=ddof, **kwargs) + return super().var( + ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) @Substitution(name="expanding") @Appender(_shared_docs["sem"]) @@ -256,9 +260,20 @@ def kurt(self, **kwargs): @Substitution(name="expanding") @Appender(_shared_docs["quantile"]) - def quantile(self, quantile, interpolation="linear", **kwargs): + def quantile( + self, + quantile, + interpolation="linear", + engine=None, + engine_kwargs=None, + **kwargs, + ): return super().quantile( - quantile=quantile, interpolation=interpolation, **kwargs + quantile=quantile, + interpolation=interpolation, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, ) @Substitution(name="expanding", func_name="cov") diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index db8a48300206b..d99b79c9b804c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1351,7 +1351,7 @@ def apply_func(values, begin, end, min_periods, raw=raw): return apply_func - def sum(self, *args, **kwargs): + def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("sum", args, kwargs) window_func = window_aggregations.roll_sum return self._apply(window_func, name="sum", **kwargs) @@ -1367,7 +1367,7 @@ def sum(self, *args, **kwargs): """ ) - def max(self, *args, **kwargs): + def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("max", args, kwargs) window_func = window_aggregations.roll_max return self._apply(window_func, name="max", **kwargs) @@ -1409,12 +1409,12 @@ def max(self, *args, **kwargs): """ ) - def min(self, *args, **kwargs): + def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("min", args, kwargs) window_func = window_aggregations.roll_min return self._apply(window_func, name="min", **kwargs) - def mean(self, *args, **kwargs): + def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("mean", args, kwargs) window_func = window_aggregations.roll_mean return self._apply(window_func, name="mean", **kwargs) @@ -1456,13 +1456,13 @@ def mean(self, *args, **kwargs): """ ) - def median(self, **kwargs): + def median(self, engine=None, engine_kwargs=None, **kwargs): window_func = window_aggregations.roll_median_c # GH 32865. Move max window size calculation to # the median function implementation return self._apply(window_func, name="median", **kwargs) - def std(self, ddof: int = 1, *args, **kwargs): + def std(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): nv.validate_window_func("std", args, kwargs) window_func = window_aggregations.roll_var @@ -1475,7 +1475,7 @@ def zsqrt_func(values, begin, end, min_periods): **kwargs, ) - def var(self, ddof: int = 1, *args, **kwargs): + def var(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): nv.validate_window_func("var", args, kwargs) window_func = partial(window_aggregations.roll_var, ddof=ddof) return self._apply( @@ -1650,7 +1650,14 @@ def kurt(self, **kwargs): """ ) - def quantile(self, quantile: float, interpolation: str = "linear", **kwargs): + def quantile( + self, + quantile: float, + interpolation: str = "linear", + engine=None, + engine_kwargs=None, + **kwargs, + ): if quantile == 1.0: window_func = window_aggregations.roll_max elif quantile == 0.0: @@ -1995,45 +2002,49 @@ def apply( @Substitution(name="rolling") @Appender(_shared_docs["sum"]) - def sum(self, *args, **kwargs): + def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("sum", args, kwargs) - return super().sum(*args, **kwargs) + return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="rolling", func_name="max") @Appender(_doc_template) @Appender(_shared_docs["max"]) - def max(self, *args, **kwargs): + def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("max", args, kwargs) - return super().max(*args, **kwargs) + return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="rolling") @Appender(_shared_docs["min"]) - def min(self, *args, **kwargs): + def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("min", args, kwargs) - return super().min(*args, **kwargs) + return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="rolling") @Appender(_shared_docs["mean"]) - def mean(self, *args, **kwargs): + def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("mean", args, kwargs) - return super().mean(*args, **kwargs) + return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="rolling") @Appender(_shared_docs["median"]) - def median(self, **kwargs): - return super().median(**kwargs) + def median(self, engine=None, engine_kwargs=None, **kwargs): + return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs) @Substitution(name="rolling", versionadded="") @Appender(_shared_docs["std"]) - def std(self, ddof=1, *args, **kwargs): + def std(self, ddof=1, engine=None, engine_kwargs=None, *args, **kwargs): nv.validate_rolling_func("std", args, kwargs) - return super().std(ddof=ddof, **kwargs) + return super().std( + ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) @Substitution(name="rolling", versionadded="") @Appender(_shared_docs["var"]) - def var(self, ddof=1, *args, **kwargs): + def var(self, ddof=1, engine=None, engine_kwargs=None, *args, **kwargs): nv.validate_rolling_func("var", args, kwargs) - return super().var(ddof=ddof, **kwargs) + return super().var( + ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs + ) @Substitution(name="rolling", func_name="skew") @Appender(_doc_template) @@ -2079,9 +2090,20 @@ def kurt(self, **kwargs): @Substitution(name="rolling") @Appender(_shared_docs["quantile"]) - def quantile(self, quantile, interpolation="linear", **kwargs): + def quantile( + self, + quantile, + interpolation="linear", + engine=None, + engine_kwargs=None, + **kwargs, + ): return super().quantile( - quantile=quantile, interpolation=interpolation, **kwargs + quantile=quantile, + interpolation=interpolation, + engine=engine, + engine_kwargs=engine_kwargs, + **kwargs, ) @Substitution(name="rolling", func_name="cov") From 8a7bbed0de207a4307627a5e187cd2848e5a07ba Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 30 Dec 2020 20:27:50 -0800 Subject: [PATCH 02/18] Route arguments through apply --- pandas/core/window/rolling.py | 43 +++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index d99b79c9b804c..6998586936bdf 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1353,6 +1353,10 @@ def apply_func(values, begin, end, min_periods, raw=raw): def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("sum", args, kwargs) + if maybe_use_numba(engine): + return self.apply( + np.nansum, raw=True, engine=engine, engine_kwargs=engine_kwargs + ) window_func = window_aggregations.roll_sum return self._apply(window_func, name="sum", **kwargs) @@ -1369,6 +1373,10 @@ def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("max", args, kwargs) + if maybe_use_numba(engine): + return self.apply( + np.nanmax, raw=True, engine=engine, engine_kwargs=engine_kwargs + ) window_func = window_aggregations.roll_max return self._apply(window_func, name="max", **kwargs) @@ -1411,11 +1419,19 @@ def max(self, *args, engine=None, engine_kwargs=None, **kwargs): def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("min", args, kwargs) + if maybe_use_numba(engine): + return self.apply( + np.nanmin, raw=True, engine=engine, engine_kwargs=engine_kwargs + ) window_func = window_aggregations.roll_min return self._apply(window_func, name="min", **kwargs) def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("mean", args, kwargs) + if maybe_use_numba(engine): + return self.apply( + np.nanmean, raw=True, engine=engine, engine_kwargs=engine_kwargs + ) window_func = window_aggregations.roll_mean return self._apply(window_func, name="mean", **kwargs) @@ -1458,12 +1474,21 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): def median(self, engine=None, engine_kwargs=None, **kwargs): window_func = window_aggregations.roll_median_c - # GH 32865. Move max window size calculation to - # the median function implementation + if maybe_use_numba(engine): + return self.apply( + np.nanmedian, raw=True, engine=engine, engine_kwargs=engine_kwargs + ) return self._apply(window_func, name="median", **kwargs) def std(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): nv.validate_window_func("std", args, kwargs) + if maybe_use_numba(engine): + return self.apply( + partial(np.nanstd, ddof=ddof), + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) window_func = window_aggregations.roll_var def zsqrt_func(values, begin, end, min_periods): @@ -1477,6 +1502,13 @@ def zsqrt_func(values, begin, end, min_periods): def var(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): nv.validate_window_func("var", args, kwargs) + if maybe_use_numba(engine): + return self.apply( + partial(np.nanvar, ddof=ddof), + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) window_func = partial(window_aggregations.roll_var, ddof=ddof) return self._apply( window_func, @@ -1658,6 +1690,13 @@ def quantile( engine_kwargs=None, **kwargs, ): + if maybe_use_numba(engine): + return self.apply( + partial(np.nanquantile, q=quantile, interpolation=interpolation), + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + ) if quantile == 1.0: window_func = window_aggregations.roll_max elif quantile == 0.0: From 10dd6aa189b7669fc2e934842cfafc59841dc3a9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 31 Dec 2020 14:51:41 -0800 Subject: [PATCH 03/18] Add docs --- pandas/core/window/rolling.py | 96 ++++++++++++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 6 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 6998586936bdf..8fcc5b8f13795 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1241,6 +1241,7 @@ def count(self): objects instead. If you are just applying a NumPy reduction function this will achieve much better performance. + engine : str, default None * ``'cython'`` : Runs rolling apply through C-extensions from cython. * ``'numba'`` : Runs rolling apply through JIT compiled code from numba. @@ -1366,8 +1367,38 @@ def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): Parameters ---------- - *args, **kwargs - Arguments and keyword arguments to be passed into func. + engine : str, default None + * ``'cython'`` : Runs rolling max through C-extensions from cython. + * ``'numba'`` : Runs rolling max through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.3.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` + + .. versionadded:: 1.3.0 + + **kwargs + For compatibility with other %(name)s methods. Has no effect on + the result. + + Returns + ------- + Series or DataFrame + Returned object type is determined by the caller of the %(name)s + calculation. + + See Also + -------- + pandas.Series.%(name)s : Calling object with a Series. + pandas.DataFrame.%(name)s : Calling object with a DataFrame. + pandas.Series.max : Similar method for Series. + pandas.DataFrame.max : Similar method for DataFrame. """ ) @@ -1386,8 +1417,25 @@ def max(self, *args, engine=None, engine_kwargs=None, **kwargs): Parameters ---------- + engine : str, default None + * ``'cython'`` : Runs rolling min through C-extensions from cython. + * ``'numba'`` : Runs rolling min through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.3.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` + + .. versionadded:: 1.3.0 + **kwargs - Under Review. + For compatibility with other %(name)s methods. Has no effect on + the result. Returns ------- @@ -1441,9 +1489,25 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): Parameters ---------- + engine : str, default None + * ``'cython'`` : Runs rolling median through C-extensions from cython. + * ``'numba'`` : Runs rolling median through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.3.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` + + .. versionadded:: 1.3.0 + **kwargs For compatibility with other %(name)s methods. Has no effect - on the computed median. + on the computed result. Returns ------- @@ -1524,7 +1588,8 @@ def var(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): Parameters ---------- **kwargs - Keyword arguments to be passed into func. + For compatibility with other %(name)s methods. Has no effect on + the result. """ def skew(self, **kwargs): @@ -1544,7 +1609,8 @@ def skew(self, **kwargs): Parameters ---------- **kwargs - Under Review. + For compatibility with other %(name)s methods. Has no effect on + the result. Returns ------- @@ -1636,6 +1702,7 @@ def kurt(self, **kwargs): ---------- quantile : float Quantile to compute. 0 <= quantile <= 1. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} This optional parameter specifies the interpolation method to use, when the desired quantile lies between two data points `i` and `j`: @@ -1646,6 +1713,23 @@ def kurt(self, **kwargs): * higher: `j`. * nearest: `i` or `j` whichever is nearest. * midpoint: (`i` + `j`) / 2. + + engine : str, default None + * ``'cython'`` : Runs rolling quantile through C-extensions from cython. + * ``'numba'`` : Runs rolling quantile through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.3.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` + + .. versionadded:: 1.3.0 + **kwargs For compatibility with other %(name)s methods. Has no effect on the result. From 65607fc25ef7391a31a02a1271ef8eefbacb0d97 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 15:03:20 -0800 Subject: [PATCH 04/18] Correct std and var signature. Add sem to fixture --- pandas/core/window/expanding.py | 4 ++-- pandas/core/window/rolling.py | 8 ++++---- pandas/tests/window/conftest.py | 1 + pandas/tests/window/test_numba.py | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 8dacb193c423b..314f03ec591ae 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -202,7 +202,7 @@ def median(self, engine=None, engine_kwargs=None, **kwargs): @Substitution(name="expanding", versionadded="") @Appender(_shared_docs["std"]) - def std(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): + def std(self, ddof: int = 1, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("std", args, kwargs) return super().std( ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs @@ -210,7 +210,7 @@ def std(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): @Substitution(name="expanding", versionadded="") @Appender(_shared_docs["var"]) - def var(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): + def var(self, ddof: int = 1, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_expanding_func("var", args, kwargs) return super().var( ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 8fcc5b8f13795..ccf37447a16f0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1544,7 +1544,7 @@ def median(self, engine=None, engine_kwargs=None, **kwargs): ) return self._apply(window_func, name="median", **kwargs) - def std(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): + def std(self, ddof: int = 1, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("std", args, kwargs) if maybe_use_numba(engine): return self.apply( @@ -1564,7 +1564,7 @@ def zsqrt_func(values, begin, end, min_periods): **kwargs, ) - def var(self, ddof: int = 1, engine=None, engine_kwargs=None, *args, **kwargs): + def var(self, ddof: int = 1, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("var", args, kwargs) if maybe_use_numba(engine): return self.apply( @@ -2155,7 +2155,7 @@ def median(self, engine=None, engine_kwargs=None, **kwargs): @Substitution(name="rolling", versionadded="") @Appender(_shared_docs["std"]) - def std(self, ddof=1, engine=None, engine_kwargs=None, *args, **kwargs): + def std(self, ddof=1, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("std", args, kwargs) return super().std( ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs @@ -2163,7 +2163,7 @@ def std(self, ddof=1, engine=None, engine_kwargs=None, *args, **kwargs): @Substitution(name="rolling", versionadded="") @Appender(_shared_docs["var"]) - def var(self, ddof=1, engine=None, engine_kwargs=None, *args, **kwargs): + def var(self, ddof=1, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_rolling_func("var", args, kwargs) return super().var( ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index a765f268cfb07..bcebbe501812b 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -47,6 +47,7 @@ def win_types_special(request): "kurt", "skew", "count", + "sem", ] ) def arithmetic_win_operators(request): diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 4d22495e6c69a..b518daf9dba18 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -138,7 +138,7 @@ def f(x): f, engine="numba", raw=True ) - def test_table_method_rolling(self, axis, nogil, parallel, nopython): + def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): @@ -173,7 +173,7 @@ def weighted_mean(x): ) tm.assert_frame_equal(result, expected) - def test_table_method_expanding(self, axis, nogil, parallel, nopython): + def test_table_method_expanding_apply(self, axis, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): From 2ff5fe2e72cd61070ce2e7425299eb2e6fd55aa8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 16:46:00 -0800 Subject: [PATCH 05/18] realized numba does not support axis or ddof arguments in np.nan* functions --- pandas/core/window/rolling.py | 85 ++++++++++++++++++++----------- pandas/tests/window/test_numba.py | 56 ++++++++++++++++++++ 2 files changed, 112 insertions(+), 29 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ccf37447a16f0..11fcbb7f7f5f0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1355,8 +1355,17 @@ def apply_func(values, begin, end, min_periods, raw=raw): def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("sum", args, kwargs) if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("method='table' is not supported.") + # Once numba supports np.nansum with axis, args will be relevant. + # https://github.com/numba/numba/issues/6610 + args = () if self.method == "single" else (0,) return self.apply( - np.nansum, raw=True, engine=engine, engine_kwargs=engine_kwargs + np.nansum, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, ) window_func = window_aggregations.roll_sum return self._apply(window_func, name="sum", **kwargs) @@ -1405,8 +1414,17 @@ def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("max", args, kwargs) if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("method='table' is not supported.") + # Once numba supports np.nanmax with axis, args will be relevant. + # https://github.com/numba/numba/issues/6610 + args = () if self.method == "single" else (0,) return self.apply( - np.nanmax, raw=True, engine=engine, engine_kwargs=engine_kwargs + np.nanmax, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, ) window_func = window_aggregations.roll_max return self._apply(window_func, name="max", **kwargs) @@ -1468,8 +1486,17 @@ def max(self, *args, engine=None, engine_kwargs=None, **kwargs): def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("min", args, kwargs) if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("method='table' is not supported.") + # Once numba supports np.nanmin with axis, args will be relevant. + # https://github.com/numba/numba/issues/6610 + args = () if self.method == "single" else (0,) return self.apply( - np.nanmin, raw=True, engine=engine, engine_kwargs=engine_kwargs + np.nanmin, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, ) window_func = window_aggregations.roll_min return self._apply(window_func, name="min", **kwargs) @@ -1477,8 +1504,17 @@ def min(self, *args, engine=None, engine_kwargs=None, **kwargs): def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("mean", args, kwargs) if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("method='table' is not supported.") + # Once numba supports np.nanmean with axis, args will be relevant. + # https://github.com/numba/numba/issues/6610 + args = () if self.method == "single" else (0,) return self.apply( - np.nanmean, raw=True, engine=engine, engine_kwargs=engine_kwargs + np.nanmean, + raw=True, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, ) window_func = window_aggregations.roll_mean return self._apply(window_func, name="mean", **kwargs) @@ -1539,20 +1575,22 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): def median(self, engine=None, engine_kwargs=None, **kwargs): window_func = window_aggregations.roll_median_c if maybe_use_numba(engine): + if self.method == "table": + raise NotImplementedError("method='table' is not supported.") + # Once numba supports np.nanmedian with axis, args will be relevant. + # https://github.com/numba/numba/issues/6610 + args = () if self.method == "single" else (0,) return self.apply( - np.nanmedian, raw=True, engine=engine, engine_kwargs=engine_kwargs - ) - return self._apply(window_func, name="median", **kwargs) - - def std(self, ddof: int = 1, *args, engine=None, engine_kwargs=None, **kwargs): - nv.validate_window_func("std", args, kwargs) - if maybe_use_numba(engine): - return self.apply( - partial(np.nanstd, ddof=ddof), + np.nanmedian, raw=True, engine=engine, engine_kwargs=engine_kwargs, + args=args, ) + return self._apply(window_func, name="median", **kwargs) + + def std(self, ddof: int = 1, *args, **kwargs): + nv.validate_window_func("std", args, kwargs) window_func = window_aggregations.roll_var def zsqrt_func(values, begin, end, min_periods): @@ -1564,15 +1602,8 @@ def zsqrt_func(values, begin, end, min_periods): **kwargs, ) - def var(self, ddof: int = 1, *args, engine=None, engine_kwargs=None, **kwargs): + def var(self, ddof: int = 1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) - if maybe_use_numba(engine): - return self.apply( - partial(np.nanvar, ddof=ddof), - raw=True, - engine=engine, - engine_kwargs=engine_kwargs, - ) window_func = partial(window_aggregations.roll_var, ddof=ddof) return self._apply( window_func, @@ -2155,19 +2186,15 @@ def median(self, engine=None, engine_kwargs=None, **kwargs): @Substitution(name="rolling", versionadded="") @Appender(_shared_docs["std"]) - def std(self, ddof=1, *args, engine=None, engine_kwargs=None, **kwargs): + def std(self, ddof=1, *args, **kwargs): nv.validate_rolling_func("std", args, kwargs) - return super().std( - ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs - ) + return super().std(ddof=ddof, **kwargs) @Substitution(name="rolling", versionadded="") @Appender(_shared_docs["var"]) - def var(self, ddof=1, *args, engine=None, engine_kwargs=None, **kwargs): + def var(self, ddof=1, *args, **kwargs): nv.validate_rolling_func("var", args, kwargs) - return super().var( - ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs - ) + return super().var(ddof=ddof, **kwargs) @Substitution(name="rolling", func_name="skew") @Appender(_doc_template) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index b518daf9dba18..8fba138d88095 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -138,6 +138,34 @@ def f(x): f, engine="numba", raw=True ) + def test_table_method_rolling_methods( + self, axis, nogil, parallel, nopython, arithmetic_win_operators + ): + method = arithmetic_win_operators + if method in ("kurt", "skew", "count", "sem", "var", "std"): + pytest.skip(f"{method} does not support numba engine.") + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(3)) + + with pytest.raises( + NotImplementedError, match="method='table' is not supported." + ): + getattr(df.rolling(2, method="table", axis=axis, min_periods=0), method)( + engine_kwargs=engine_kwargs, engine="numba" + ) + + # Once method='table' is supported, uncomment test below. + # + # result = getattr( + # df.rolling(2, method="table", axis=axis, min_periods=0), method + # )(engine_kwargs=engine_kwargs, engine="numba") + # expected = getattr( + # df.rolling(2, method="single", axis=axis, min_periods=0), method + # )(engine_kwargs=engine_kwargs, engine="numba") + # tm.assert_frame_equal(result, expected) + def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} @@ -187,3 +215,31 @@ def f(x): f, raw=True, engine_kwargs=engine_kwargs, engine="numba" ) tm.assert_frame_equal(result, expected) + + def test_table_method_expanding_methods( + self, axis, nogil, parallel, nopython, arithmetic_win_operators + ): + method = arithmetic_win_operators + if method in ("kurt", "skew", "count", "sem", "var", "std"): + pytest.skip(f"{method} does not support numba engine.") + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(3)) + + with pytest.raises( + NotImplementedError, match="method='table' is not supported." + ): + getattr(df.expanding(method="table", axis=axis), method)( + engine_kwargs=engine_kwargs, engine="numba" + ) + + # Once method='table' is supported, uncomment test below. + # + # result = getattr( + # df.rolling(method="table", axis=axis), method + # )(engine_kwargs=engine_kwargs, engine="numba") + # expected = getattr( + # df.rolling(method="single", axis=axis), method + # )(engine_kwargs=engine_kwargs, engine="numba") + # tm.assert_frame_equal(result, expected) From eaee1cc87da3b234a0a28aa29c4362fb53a4d33c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 16:47:14 -0800 Subject: [PATCH 06/18] Move median func below --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 11fcbb7f7f5f0..95636e7101eca 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1573,7 +1573,6 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): ) def median(self, engine=None, engine_kwargs=None, **kwargs): - window_func = window_aggregations.roll_median_c if maybe_use_numba(engine): if self.method == "table": raise NotImplementedError("method='table' is not supported.") @@ -1587,6 +1586,7 @@ def median(self, engine=None, engine_kwargs=None, **kwargs): engine_kwargs=engine_kwargs, args=args, ) + window_func = window_aggregations.roll_median_c return self._apply(window_func, name="median", **kwargs) def std(self, ddof: int = 1, *args, **kwargs): From cce6181167d3e4d3070185f99999330daadc9301 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 16:53:31 -0800 Subject: [PATCH 07/18] fix commented code --- pandas/tests/window/test_numba.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 8fba138d88095..0cc15ebed812f 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -237,9 +237,9 @@ def test_table_method_expanding_methods( # Once method='table' is supported, uncomment test below. # # result = getattr( - # df.rolling(method="table", axis=axis), method + # df.expanding(method="table", axis=axis), method # )(engine_kwargs=engine_kwargs, engine="numba") # expected = getattr( - # df.rolling(method="single", axis=axis), method + # df.expanding(method="single", axis=axis), method # )(engine_kwargs=engine_kwargs, engine="numba") # tm.assert_frame_equal(result, expected) From 7edd1409468b3b4cd5fe4458122a91274c40c242 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 16:58:52 -0800 Subject: [PATCH 08/18] Remove numba engine from quantile --- pandas/core/window/expanding.py | 16 ++++------------ pandas/core/window/rolling.py | 9 --------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 314f03ec591ae..1f0c16fb5aa8f 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -202,19 +202,15 @@ def median(self, engine=None, engine_kwargs=None, **kwargs): @Substitution(name="expanding", versionadded="") @Appender(_shared_docs["std"]) - def std(self, ddof: int = 1, *args, engine=None, engine_kwargs=None, **kwargs): + def std(self, ddof: int = 1, *args, **kwargs): nv.validate_expanding_func("std", args, kwargs) - return super().std( - ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs - ) + return super().std(ddof=ddof, **kwargs) @Substitution(name="expanding", versionadded="") @Appender(_shared_docs["var"]) - def var(self, ddof: int = 1, *args, engine=None, engine_kwargs=None, **kwargs): + def var(self, ddof: int = 1, *args, **kwargs): nv.validate_expanding_func("var", args, kwargs) - return super().var( - ddof=ddof, engine=engine, engine_kwargs=engine_kwargs, **kwargs - ) + return super().var(ddof=ddof, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["sem"]) @@ -264,15 +260,11 @@ def quantile( self, quantile, interpolation="linear", - engine=None, - engine_kwargs=None, **kwargs, ): return super().quantile( quantile=quantile, interpolation=interpolation, - engine=engine, - engine_kwargs=engine_kwargs, **kwargs, ) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 95636e7101eca..ec7dcc02edb5c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1801,17 +1801,8 @@ def quantile( self, quantile: float, interpolation: str = "linear", - engine=None, - engine_kwargs=None, **kwargs, ): - if maybe_use_numba(engine): - return self.apply( - partial(np.nanquantile, q=quantile, interpolation=interpolation), - raw=True, - engine=engine, - engine_kwargs=engine_kwargs, - ) if quantile == 1.0: window_func = window_aggregations.roll_max elif quantile == 0.0: From 344485fa30936639d262a924c80b322e62847d45 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 17:04:49 -0800 Subject: [PATCH 09/18] Remove other arguments from quantile --- pandas/core/window/rolling.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ec7dcc02edb5c..331cefe98729c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1797,12 +1797,7 @@ def kurt(self, **kwargs): """ ) - def quantile( - self, - quantile: float, - interpolation: str = "linear", - **kwargs, - ): + def quantile(self, quantile: float, interpolation: str = "linear", **kwargs): if quantile == 1.0: window_func = window_aggregations.roll_max elif quantile == 0.0: @@ -2231,19 +2226,10 @@ def kurt(self, **kwargs): @Substitution(name="rolling") @Appender(_shared_docs["quantile"]) - def quantile( - self, - quantile, - interpolation="linear", - engine=None, - engine_kwargs=None, - **kwargs, - ): + def quantile(self, quantile, interpolation="linear", **kwargs): return super().quantile( quantile=quantile, interpolation=interpolation, - engine=engine, - engine_kwargs=engine_kwargs, **kwargs, ) From 4ce2a78bb05445a0cecfb7a3adc5fa0e6981380c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 17:27:24 -0800 Subject: [PATCH 10/18] Add numba engine tests single method test --- pandas/tests/window/conftest.py | 13 +++++++ pandas/tests/window/test_numba.py | 57 +++++++++++++++++++++++++------ 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index bcebbe501812b..70bead489d2c6 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -54,6 +54,19 @@ def arithmetic_win_operators(request): return request.param +@pytest.fixture( + params=[ + "sum", + "mean", + "median", + "max", + "min", + ] +) +def arithmetic_numba_supported_operators(request): + return request.param + + @pytest.fixture(params=["right", "left", "both", "neither"]) def closed(request): return request.param diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 0cc15ebed812f..39a81e6d7e012 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -12,9 +12,9 @@ @td.skip_if_no("numba", "0.46.0") @pytest.mark.filterwarnings("ignore:\\nThe keyword argument") # Filter warnings when parallel=True and the function can't be parallelized by Numba -class TestRollingApply: +class TestEngine: @pytest.mark.parametrize("jit", [True, False]) - def test_numba_vs_cython(self, jit, nogil, parallel, nopython, center): + def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center): def f(x, *args): arg_sum = 0 for arg in args: @@ -38,8 +38,47 @@ def f(x, *args): ) tm.assert_series_equal(result, expected) + def test_numba_vs_cython_rolling_methods( + self, nogil, parallel, nopython, arithmetic_numba_supported_operators + ): + + method = arithmetic_numba_supported_operators + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(5)) + roll = df.rolling(2) + result = getattr(roll, method)(engine="numba", engine_kwargs=engine_kwargs) + expected = getattr(roll, method)(engine="cython") + + # Check the cache + assert (getattr(np, f"nan{method}"), "Rolling_apply_single") in NUMBA_FUNC_CACHE + + tm.assert_series_equal(result, expected) + + def test_numba_vs_cython_expanding_methods( + self, nogil, parallel, nopython, arithmetic_numba_supported_operators + ): + + method = arithmetic_numba_supported_operators + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + df = DataFrame(np.eye(5)) + expand = df.expanding() + result = getattr(expand, method)(engine="numba", engine_kwargs=engine_kwargs) + expected = getattr(expand, method)(engine="cython") + + # Check the cache + assert ( + getattr(np, f"nan{method}"), + "Expanding_apply_single", + ) in NUMBA_FUNC_CACHE + + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("jit", [True, False]) - def test_cache(self, jit, nogil, parallel, nopython): + def test_cache_apply(self, jit, nogil, parallel, nopython): # Test that the functions are cached correctly if we switch functions def func_1(x): return np.mean(x) + 4 @@ -139,11 +178,9 @@ def f(x): ) def test_table_method_rolling_methods( - self, axis, nogil, parallel, nopython, arithmetic_win_operators + self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators ): - method = arithmetic_win_operators - if method in ("kurt", "skew", "count", "sem", "var", "std"): - pytest.skip(f"{method} does not support numba engine.") + method = arithmetic_numba_supported_operators engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} @@ -217,11 +254,9 @@ def f(x): tm.assert_frame_equal(result, expected) def test_table_method_expanding_methods( - self, axis, nogil, parallel, nopython, arithmetic_win_operators + self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators ): - method = arithmetic_win_operators - if method in ("kurt", "skew", "count", "sem", "var", "std"): - pytest.skip(f"{method} does not support numba engine.") + method = arithmetic_numba_supported_operators engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} From 353fab6633988a2b52bc213388ce43285d9d8752 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 17:58:39 -0800 Subject: [PATCH 11/18] Change to assert_series_equal --- pandas/tests/window/test_numba.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 39a81e6d7e012..7f440374372b2 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -54,7 +54,7 @@ def test_numba_vs_cython_rolling_methods( # Check the cache assert (getattr(np, f"nan{method}"), "Rolling_apply_single") in NUMBA_FUNC_CACHE - tm.assert_series_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_numba_vs_cython_expanding_methods( self, nogil, parallel, nopython, arithmetic_numba_supported_operators @@ -75,7 +75,7 @@ def test_numba_vs_cython_expanding_methods( "Expanding_apply_single", ) in NUMBA_FUNC_CACHE - tm.assert_series_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("jit", [True, False]) def test_cache_apply(self, jit, nogil, parallel, nopython): From 33e055249324b0cd4e5cbeaafa7ccb8690ea2bf2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 18:04:10 -0800 Subject: [PATCH 12/18] Add whatsnew note --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 049c4fe653107..a6088d4b25393 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -51,6 +51,7 @@ Other enhancements - :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`) - Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`) - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`) +- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:``) .. --------------------------------------------------------------------------- From a56fe92cbd1929baef7e0c237efcb6ae8a775efe Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 18:07:17 -0800 Subject: [PATCH 13/18] Add PR number --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index a6088d4b25393..cdcde9739d0f2 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -51,7 +51,7 @@ Other enhancements - :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`) - Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`) - :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`) -- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:``) +- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`) .. --------------------------------------------------------------------------- From 6bc13335b1bce8080f14cbd68745d7798e81746a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Jan 2021 18:26:15 -0800 Subject: [PATCH 14/18] Remove redundant doc section --- pandas/core/window/rolling.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 331cefe98729c..7ae1e61d426b9 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1395,19 +1395,6 @@ def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): **kwargs For compatibility with other %(name)s methods. Has no effect on the result. - - Returns - ------- - Series or DataFrame - Returned object type is determined by the caller of the %(name)s - calculation. - - See Also - -------- - pandas.Series.%(name)s : Calling object with a Series. - pandas.DataFrame.%(name)s : Calling object with a DataFrame. - pandas.Series.max : Similar method for Series. - pandas.DataFrame.max : Similar method for DataFrame. """ ) From 178543b3b6e2f9b789dd62f1200ca4e5ff816269 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 3 Jan 2021 22:36:31 -0800 Subject: [PATCH 15/18] Add ASV benchmarks --- asv_bench/benchmarks/rolling.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 306083e9c22b2..d0b61bc03a43a 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -50,20 +50,24 @@ class Engine: ["int", "float"], [np.sum, lambda x: np.sum(x) + 5], ["cython", "numba"], + ["sum", "max", "min", "median", "mean"], ) - param_names = ["constructor", "dtype", "function", "engine"] + param_names = ["constructor", "dtype", "function", "engine", "method"] - def setup(self, constructor, dtype, function, engine): + def setup(self, constructor, dtype, function, engine, method): N = 10 ** 3 arr = (100 * np.random.random(N)).astype(dtype) self.data = getattr(pd, constructor)(arr) - def time_rolling_apply(self, constructor, dtype, function, engine): + def time_rolling_apply(self, constructor, dtype, function, engine, method): self.data.rolling(10).apply(function, raw=True, engine=engine) - def time_expanding_apply(self, constructor, dtype, function, engine): + def time_expanding_apply(self, constructor, dtype, function, engine, method): self.data.expanding().apply(function, raw=True, engine=engine) + def time_rolling_methods(self, constructor, dtype, function, engine, method): + getattr(self.data.rolling(10), method)() + class ExpandingMethods: From d8582dd0f7daab1a9faed58daec28056af34491b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 3 Jan 2021 22:37:03 -0800 Subject: [PATCH 16/18] Add engine arg --- asv_bench/benchmarks/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index d0b61bc03a43a..5f8cdb2a0bdac 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -66,7 +66,7 @@ def time_expanding_apply(self, constructor, dtype, function, engine, method): self.data.expanding().apply(function, raw=True, engine=engine) def time_rolling_methods(self, constructor, dtype, function, engine, method): - getattr(self.data.rolling(10), method)() + getattr(self.data.rolling(10), method)(engine=engine) class ExpandingMethods: From 49bae51a41fd9a3d971ac4d277bb4bcd1866eeb2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 3 Jan 2021 23:19:52 -0800 Subject: [PATCH 17/18] Add note in user_guide --- doc/source/user_guide/window.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index 08641bc5b17ae..9db4a4bb873bd 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -321,6 +321,10 @@ Numba will be applied in potentially two routines: #. If ``func`` is a standard Python function, the engine will `JIT `__ the passed function. ``func`` can also be a JITed function in which case the engine will not JIT the function again. #. The engine will JIT the for loop where the apply function is applied to each window. +.. versionadded:: 1.3 + +``mean``, ``median``, ``max``, ``min``, and ``sum`` also support the ``engine`` and ``engine_kwargs`` arguments. + The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the `numba.jit decorator `__. These keyword arguments will be applied to *both* the passed function (if a standard Python function) From fc4656c7580b35762a1c308de6b57e128cae4ce8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 4 Jan 2021 11:33:39 -0800 Subject: [PATCH 18/18] xfail instead of comment out --- pandas/tests/window/test_numba.py | 52 ++++++++++++------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 7f440374372b2..9d9c216801d73 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -177,6 +177,9 @@ def f(x): f, engine="numba", raw=True ) + @pytest.mark.xfail( + raises=NotImplementedError, reason="method='table' is not supported." + ) def test_table_method_rolling_methods( self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators ): @@ -186,22 +189,13 @@ def test_table_method_rolling_methods( df = DataFrame(np.eye(3)) - with pytest.raises( - NotImplementedError, match="method='table' is not supported." - ): - getattr(df.rolling(2, method="table", axis=axis, min_periods=0), method)( - engine_kwargs=engine_kwargs, engine="numba" - ) - - # Once method='table' is supported, uncomment test below. - # - # result = getattr( - # df.rolling(2, method="table", axis=axis, min_periods=0), method - # )(engine_kwargs=engine_kwargs, engine="numba") - # expected = getattr( - # df.rolling(2, method="single", axis=axis, min_periods=0), method - # )(engine_kwargs=engine_kwargs, engine="numba") - # tm.assert_frame_equal(result, expected) + result = getattr( + df.rolling(2, method="table", axis=axis, min_periods=0), method + )(engine_kwargs=engine_kwargs, engine="numba") + expected = getattr( + df.rolling(2, method="single", axis=axis, min_periods=0), method + )(engine_kwargs=engine_kwargs, engine="numba") + tm.assert_frame_equal(result, expected) def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} @@ -253,6 +247,9 @@ def f(x): ) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail( + raises=NotImplementedError, reason="method='table' is not supported." + ) def test_table_method_expanding_methods( self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators ): @@ -262,19 +259,10 @@ def test_table_method_expanding_methods( df = DataFrame(np.eye(3)) - with pytest.raises( - NotImplementedError, match="method='table' is not supported." - ): - getattr(df.expanding(method="table", axis=axis), method)( - engine_kwargs=engine_kwargs, engine="numba" - ) - - # Once method='table' is supported, uncomment test below. - # - # result = getattr( - # df.expanding(method="table", axis=axis), method - # )(engine_kwargs=engine_kwargs, engine="numba") - # expected = getattr( - # df.expanding(method="single", axis=axis), method - # )(engine_kwargs=engine_kwargs, engine="numba") - # tm.assert_frame_equal(result, expected) + result = getattr(df.expanding(method="table", axis=axis), method)( + engine_kwargs=engine_kwargs, engine="numba" + ) + expected = getattr(df.expanding(method="single", axis=axis), method)( + engine_kwargs=engine_kwargs, engine="numba" + ) + tm.assert_frame_equal(result, expected)