diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index 921eb737aef07..201e33c2fd0ee 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -35,9 +35,12 @@ Function application :toctree: api/ GroupBy.apply - GroupBy.agg - GroupBy.aggregate - GroupBy.transform + SeriesGroupBy.agg + DataFrameGroupBy.agg + SeriesGroupBy.aggregate + DataFrameGroupBy.aggregate + SeriesGroupBy.transform + DataFrameGroupBy.transform GroupBy.pipe Computations / descriptive stats diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4b4801f4e8c58..f360c66804889 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7105,6 +7105,9 @@ def _gotitem( see_also=_agg_summary_and_see_also_doc, examples=_agg_examples_doc, versionadded="\n.. versionadded:: 0.20.0\n", + numba_func_notes="", + numba_args="", + numba_notes="", **_shared_doc_kwargs, ) @Appender(_shared_docs["aggregate"]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b550857252466..4b03823b88cbc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5071,12 +5071,16 @@ def pipe(self, func, *args, **kwargs): Function to use for aggregating the data. If a function, must either work when passed a %(klass)s or when passed to %(klass)s.apply. + %(numba_func_notes)s + Accepted combinations are: - function - string function name - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - dict of axis labels -> functions, function names or list of such. + + %(numba_args)s %(axis)s *args Positional arguments to pass to `func`. @@ -5100,6 +5104,7 @@ def pipe(self, func, *args, **kwargs): `agg` is an alias for `aggregate`. Use the alias. A passed user-defined-function will be passed a Series for evaluation. + %(numba_notes)s %(examples)s""" ) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ddf553dd1dd62..0cd73cc56470f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -80,6 +80,9 @@ check_kwargs_and_nopython, get_jit_arguments, jit_user_function, + numba_groupby_args, + numba_groupby_func_notes, + numba_groupby_notes, split_for_numba, validate_udf, ) @@ -181,9 +184,9 @@ def _selection_name(self): """ See Also -------- - pandas.Series.groupby.apply - pandas.Series.groupby.transform - pandas.Series.aggregate + Series.groupby.apply + Series.groupby.transform + Series.aggregate """ ) @@ -242,6 +245,9 @@ def apply(self, func, *args, **kwargs): versionadded="", klass="Series", axis="", + numba_func_notes=numba_groupby_func_notes, + numba_args=numba_groupby_args, + numba_notes=numba_groupby_notes, ) @Appender(_shared_docs["aggregate"]) def aggregate( @@ -476,7 +482,7 @@ def _aggregate_named(self, func, *args, **kwargs): return result - @Substitution(klass="Series", selected="A.") + @Substitution(klass="Series") @Appender(_transform_template) def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs): func = self._get_cython_func(func) or func @@ -858,9 +864,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]): """ See Also -------- - pandas.DataFrame.groupby.apply - pandas.DataFrame.groupby.transform - pandas.DataFrame.aggregate + DataFrame.groupby.apply + DataFrame.groupby.transform + DataFrame.aggregate """ ) @@ -946,6 +952,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]): versionadded="", klass="DataFrame", axis="", + numba_func_notes=numba_groupby_func_notes, + numba_args=numba_groupby_args, + numba_notes=numba_groupby_notes, ) @Appender(_shared_docs["aggregate"]) def aggregate( @@ -1466,7 +1475,7 @@ def _transform_general( concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False) return self._set_result_index_ordered(concatenated) - @Substitution(klass="DataFrame", selected="") + @Substitution(klass="DataFrame") @Appender(_transform_template) def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs): diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6924c7d320bc4..50fac4753eda7 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -310,14 +310,17 @@ class providing the base-class of operations. * f must not mutate groups. Mutation is not supported and may produce unexpected results. +When using ``engine='numba'``, there will be no "fall back" behavior internally. +The group data and group index will be passed as numpy arrays to the JITed +user defined function, and no alternative execution attempts will be tried. + Examples -------- -# Same shape >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', ... 'foo', 'bar'], ... 'B' : ['one', 'one', 'two', 'three', -... 'two', 'two'], +... 'two', 'two'], ... 'C' : [1, 5, 5, 2, 5, 5], ... 'D' : [2.0, 5., 8., 1., 2., 9.]}) >>> grouped = df.groupby('A') @@ -330,7 +333,8 @@ class providing the base-class of operations. 4 0.577350 -0.577350 5 0.577350 1.000000 -# Broadcastable +Broadcast result of the transformation + >>> grouped.transform(lambda x: x.max() - x.min()) C D 0 4 6.0 diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 06751d9c35fab..3feee63bfa05c 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -273,6 +273,9 @@ def pipe(self, func, *args, **kwargs): versionadded="", klass="DataFrame", axis="", + numba_func_notes="", + numba_args="", + numba_notes="", ) @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): diff --git a/pandas/core/series.py b/pandas/core/series.py index eb409b432f89c..36a17a70c7b24 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3809,6 +3809,9 @@ def _gotitem(self, key, ndim, subset=None) -> "Series": see_also=_agg_see_also_doc, examples=_agg_examples_doc, versionadded="\n.. versionadded:: 0.20.0\n", + numba_func_notes="", + numba_args="", + numba_notes="", **_shared_doc_kwargs, ) @Appender(generic._shared_docs["aggregate"]) diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py index c2e4b38ad5b4d..4b56e6bcf6386 100644 --- a/pandas/core/util/numba_.py +++ b/pandas/core/util/numba_.py @@ -13,6 +13,39 @@ NUMBA_FUNC_CACHE: Dict[Tuple[Callable, str], Callable] = dict() +numba_groupby_func_notes = """ + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + .. versionchanged:: 1.1.0""" +numba_groupby_args = """ + + engine : str, default 'cython' + + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + + .. versionadded:: 1.1.0 + + engine_kwargs : dict, default None + + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + applied to the function + + .. versionadded:: 1.1.0""" +numba_groupby_notes = """ + When using ``engine='numba'``, there will be no "fall back" behavior internally. + The group data and group index will be passed as numpy arrays to the JITed + user defined function, and no alternative execution attempts will be tried.""" + + def check_kwargs_and_nopython( kwargs: Optional[Dict] = None, nopython: Optional[bool] = None ) -> None: diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 2759280dc1d1c..5530a56d437be 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -188,6 +188,9 @@ def _constructor(self): versionadded="", klass="Series/Dataframe", axis="", + numba_func_notes="", + numba_args="", + numba_notes="", ) @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 146c139806bca..5980b496d28b6 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -120,6 +120,9 @@ def _get_window(self, other=None, **kwargs): versionadded="", klass="Series/Dataframe", axis="", + numba_func_notes="", + numba_args="", + numba_notes="", ) @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 6c775953e18db..e4bd98fa8ba6c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1070,6 +1070,9 @@ def _get_window( versionadded="", klass="Series/DataFrame", axis="", + numba_func_notes="", + numba_args="", + numba_notes="", ) @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): @@ -1937,6 +1940,9 @@ def _validate_freq(self): versionadded="", klass="Series/Dataframe", axis="", + numba_func_notes="", + numba_args="", + numba_notes="", ) @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs):