diff --git a/ci/deps/azure-37-slow.yaml b/ci/deps/azure-37-slow.yaml index 05b33fa351ac9..5d097e397992c 100644 --- a/ci/deps/azure-37-slow.yaml +++ b/ci/deps/azure-37-slow.yaml @@ -36,3 +36,4 @@ dependencies: - xlwt - moto - flask + - numba diff --git a/ci/deps/azure-38-slow.yaml b/ci/deps/azure-38-slow.yaml index fd40f40294b7f..0a4107917f01a 100644 --- a/ci/deps/azure-38-slow.yaml +++ b/ci/deps/azure-38-slow.yaml @@ -34,3 +34,4 @@ dependencies: - xlwt - moto - flask + - numba diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 886469837d184..9e557a0020f1e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -37,7 +37,7 @@ For example: :class:`Rolling` and :class:`Expanding` now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. -See ref:`window.overview` for performance and functional benefits. (:issue:`15095`) +See ref:`window.overview` for performance and functional benefits. (:issue:`15095`, :issue:`38995`) .. _whatsnew_130.enhancements.other: diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 46b47b7e988c4..aa69d4fa675cd 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -1,3 +1,4 @@ +import functools from typing import Any, Callable, Dict, Optional, Tuple import numpy as np @@ -220,3 +221,21 @@ def roll_table( return result return roll_table + + +# This function will no longer be needed once numba supports +# axis for all np.nan* agg functions +# https://github.com/numba/numba/issues/1269 +@functools.lru_cache(maxsize=None) +def generate_manual_numpy_nan_agg_with_axis(nan_func): + numba = import_optional_dependency("numba") + + @numba.jit(nopython=True, nogil=True, parallel=True) + def nan_agg_with_axis(table): + result = np.empty(table.shape[1]) + for i in numba.prange(table.shape[1]): + partition = table[:, i] + result[i] = nan_func(partition) + return result + + return nan_agg_with_axis diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index a4612a4c8ed5d..393c517a63660 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -65,6 +65,7 @@ VariableWindowIndexer, ) from pandas.core.window.numba_ import ( + generate_manual_numpy_nan_agg_with_axis, generate_numba_apply_func, generate_numba_table_func, ) @@ -1378,16 +1379,15 @@ def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("sum", args, kwargs) if maybe_use_numba(engine): if self.method == "table": - raise NotImplementedError("method='table' is not supported.") - # Once numba supports np.nansum with axis, args will be relevant. - # https://github.com/numba/numba/issues/6610 - args = () if self.method == "single" else (0,) + func = generate_manual_numpy_nan_agg_with_axis(np.nansum) + else: + func = np.nansum + return self.apply( - np.nansum, + func, raw=True, engine=engine, engine_kwargs=engine_kwargs, - args=args, ) window_func = window_aggregations.roll_sum return self._apply(window_func, name="sum", **kwargs) @@ -1424,16 +1424,15 @@ def max(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("max", args, kwargs) if maybe_use_numba(engine): if self.method == "table": - raise NotImplementedError("method='table' is not supported.") - # Once numba supports np.nanmax with axis, args will be relevant. - # https://github.com/numba/numba/issues/6610 - args = () if self.method == "single" else (0,) + func = generate_manual_numpy_nan_agg_with_axis(np.nanmax) + else: + func = np.nanmax + return self.apply( - np.nanmax, + func, raw=True, engine=engine, engine_kwargs=engine_kwargs, - args=args, ) window_func = window_aggregations.roll_max return self._apply(window_func, name="max", **kwargs) @@ -1496,16 +1495,15 @@ def min(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("min", args, kwargs) if maybe_use_numba(engine): if self.method == "table": - raise NotImplementedError("method='table' is not supported.") - # Once numba supports np.nanmin with axis, args will be relevant. - # https://github.com/numba/numba/issues/6610 - args = () if self.method == "single" else (0,) + func = generate_manual_numpy_nan_agg_with_axis(np.nanmin) + else: + func = np.nanmin + return self.apply( - np.nanmin, + func, raw=True, engine=engine, engine_kwargs=engine_kwargs, - args=args, ) window_func = window_aggregations.roll_min return self._apply(window_func, name="min", **kwargs) @@ -1514,16 +1512,15 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): nv.validate_window_func("mean", args, kwargs) if maybe_use_numba(engine): if self.method == "table": - raise NotImplementedError("method='table' is not supported.") - # Once numba supports np.nanmean with axis, args will be relevant. - # https://github.com/numba/numba/issues/6610 - args = () if self.method == "single" else (0,) + func = generate_manual_numpy_nan_agg_with_axis(np.nanmean) + else: + func = np.nanmean + return self.apply( - np.nanmean, + func, raw=True, engine=engine, engine_kwargs=engine_kwargs, - args=args, ) window_func = window_aggregations.roll_mean return self._apply(window_func, name="mean", **kwargs) @@ -1584,16 +1581,15 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs): def median(self, engine=None, engine_kwargs=None, **kwargs): if maybe_use_numba(engine): if self.method == "table": - raise NotImplementedError("method='table' is not supported.") - # Once numba supports np.nanmedian with axis, args will be relevant. - # https://github.com/numba/numba/issues/6610 - args = () if self.method == "single" else (0,) + func = generate_manual_numpy_nan_agg_with_axis(np.nanmedian) + else: + func = np.nanmedian + return self.apply( - np.nanmedian, + func, raw=True, engine=engine, engine_kwargs=engine_kwargs, - args=args, ) window_func = window_aggregations.roll_median_c return self._apply(window_func, name="median", **kwargs) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 9d9c216801d73..173e39ef42908 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -163,6 +163,7 @@ def test_invalid_kwargs_nopython(): @td.skip_if_no("numba", "0.46.0") +@pytest.mark.slow @pytest.mark.filterwarnings("ignore:\\nThe keyword argument") # Filter warnings when parallel=True and the function can't be parallelized by Numba class TestTableMethod: @@ -177,9 +178,6 @@ def f(x): f, engine="numba", raw=True ) - @pytest.mark.xfail( - raises=NotImplementedError, reason="method='table' is not supported." - ) def test_table_method_rolling_methods( self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators ): @@ -247,9 +245,6 @@ def f(x): ) tm.assert_frame_equal(result, expected) - @pytest.mark.xfail( - raises=NotImplementedError, reason="method='table' is not supported." - ) def test_table_method_expanding_methods( self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators ):