Skip to content

ENH: Add numba engine to several rolling aggregations #38895

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Jan 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
db2f86d
Add engine arguments to methods that support numpy nan methods
mroeschke Dec 30, 2020
8a7bbed
Route arguments through apply
mroeschke Dec 31, 2020
61f6f89
Merge remote-tracking branch 'upstream/master' into enh/rolling_table…
mroeschke Dec 31, 2020
10dd6aa
Add docs
mroeschke Dec 31, 2020
0707c28
Merge remote-tracking branch 'upstream/master' into enh/rolling_table…
mroeschke Jan 1, 2021
65607fc
Correct std and var signature. Add sem to fixture
mroeschke Jan 1, 2021
2ff5fe2
realized numba does not support axis or ddof arguments in np.nan* fun…
mroeschke Jan 2, 2021
eaee1cc
Move median func below
mroeschke Jan 2, 2021
cce6181
fix commented code
mroeschke Jan 2, 2021
7edd140
Remove numba engine from quantile
mroeschke Jan 2, 2021
344485f
Remove other arguments from quantile
mroeschke Jan 2, 2021
4ce2a78
Add numba engine tests single method test
mroeschke Jan 2, 2021
353fab6
Change to assert_series_equal
mroeschke Jan 2, 2021
33e0552
Add whatsnew note
mroeschke Jan 2, 2021
a56fe92
Add PR number
mroeschke Jan 2, 2021
fee8e2e
Merge remote-tracking branch 'upstream/master' into enh/rolling_table…
mroeschke Jan 2, 2021
6bc1333
Remove redundant doc section
mroeschke Jan 2, 2021
6bc330b
Merge remote-tracking branch 'upstream/master' into enh/rolling_table…
mroeschke Jan 4, 2021
76fc33f
Merge remote-tracking branch 'upstream/master' into enh/rolling_table…
mroeschke Jan 4, 2021
481bfd4
Merge remote-tracking branch 'upstream/master' into enh/rolling_table…
mroeschke Jan 4, 2021
178543b
Add ASV benchmarks
mroeschke Jan 4, 2021
d8582dd
Add engine arg
mroeschke Jan 4, 2021
49bae51
Add note in user_guide
mroeschke Jan 4, 2021
f0e5e59
Merge remote-tracking branch 'upstream/master' into enh/rolling_table…
mroeschke Jan 4, 2021
fc4656c
xfail instead of comment out
mroeschke Jan 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,24 @@ class Engine:
["int", "float"],
[np.sum, lambda x: np.sum(x) + 5],
["cython", "numba"],
["sum", "max", "min", "median", "mean"],
)
param_names = ["constructor", "dtype", "function", "engine"]
param_names = ["constructor", "dtype", "function", "engine", "method"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these benchmarks still reasonable in terms of total time, e.g. < 1s per

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Each benchmark is around ~1s. Is that too much?


def setup(self, constructor, dtype, function, engine):
def setup(self, constructor, dtype, function, engine, method):
N = 10 ** 3
arr = (100 * np.random.random(N)).astype(dtype)
self.data = getattr(pd, constructor)(arr)

def time_rolling_apply(self, constructor, dtype, function, engine):
def time_rolling_apply(self, constructor, dtype, function, engine, method):
self.data.rolling(10).apply(function, raw=True, engine=engine)

def time_expanding_apply(self, constructor, dtype, function, engine):
def time_expanding_apply(self, constructor, dtype, function, engine, method):
self.data.expanding().apply(function, raw=True, engine=engine)

def time_rolling_methods(self, constructor, dtype, function, engine, method):
getattr(self.data.rolling(10), method)(engine=engine)


class ExpandingMethods:

Expand Down
4 changes: 4 additions & 0 deletions doc/source/user_guide/window.rst
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,10 @@ Numba will be applied in potentially two routines:
#. If ``func`` is a standard Python function, the engine will `JIT <https://numba.pydata.org/numba-doc/latest/user/overview.html>`__ the passed function. ``func`` can also be a JITed function in which case the engine will not JIT the function again.
#. The engine will JIT the for loop where the apply function is applied to each window.

.. versionadded:: 1.3

``mean``, ``median``, ``max``, ``min``, and ``sum`` also support the ``engine`` and ``engine_kwargs`` arguments.

The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the
`numba.jit decorator <https://numba.pydata.org/numba-doc/latest/reference/jit-compilation.html#numba.jit>`__.
These keyword arguments will be applied to *both* the passed function (if a standard Python function)
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ Other enhancements
- Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`)
- :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be worth a note in the user docs as well


.. ---------------------------------------------------------------------------

Expand Down
31 changes: 19 additions & 12 deletions pandas/core/window/expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,33 +172,33 @@ def apply(

@Substitution(name="expanding")
@Appender(_shared_docs["sum"])
def sum(self, *args, **kwargs):
def sum(self, *args, engine=None, engine_kwargs=None, **kwargs):
nv.validate_expanding_func("sum", args, kwargs)
return super().sum(*args, **kwargs)
return super().sum(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs)

@Substitution(name="expanding", func_name="max")
@Appender(_doc_template)
@Appender(_shared_docs["max"])
def max(self, *args, **kwargs):
def max(self, *args, engine=None, engine_kwargs=None, **kwargs):
nv.validate_expanding_func("max", args, kwargs)
return super().max(*args, **kwargs)
return super().max(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs)

@Substitution(name="expanding")
@Appender(_shared_docs["min"])
def min(self, *args, **kwargs):
def min(self, *args, engine=None, engine_kwargs=None, **kwargs):
nv.validate_expanding_func("min", args, kwargs)
return super().min(*args, **kwargs)
return super().min(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs)

@Substitution(name="expanding")
@Appender(_shared_docs["mean"])
def mean(self, *args, **kwargs):
def mean(self, *args, engine=None, engine_kwargs=None, **kwargs):
nv.validate_expanding_func("mean", args, kwargs)
return super().mean(*args, **kwargs)
return super().mean(*args, engine=engine, engine_kwargs=engine_kwargs, **kwargs)

@Substitution(name="expanding")
@Appender(_shared_docs["median"])
def median(self, **kwargs):
return super().median(**kwargs)
def median(self, engine=None, engine_kwargs=None, **kwargs):
return super().median(engine=engine, engine_kwargs=engine_kwargs, **kwargs)

@Substitution(name="expanding", versionadded="")
@Appender(_shared_docs["std"])
Expand Down Expand Up @@ -256,9 +256,16 @@ def kurt(self, **kwargs):

@Substitution(name="expanding")
@Appender(_shared_docs["quantile"])
def quantile(self, quantile, interpolation="linear", **kwargs):
def quantile(
self,
quantile,
interpolation="linear",
**kwargs,
):
return super().quantile(
quantile=quantile, interpolation=interpolation, **kwargs
quantile=quantile,
interpolation=interpolation,
**kwargs,
)

@Substitution(name="expanding", func_name="cov")
Expand Down
Loading