From f26de1b97df4781ccef8d7fac0f0d7cde6e8d49d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 26 Apr 2020 23:13:34 -0700 Subject: [PATCH 1/5] Fix docs for groupby.agg/transform --- doc/source/reference/groupby.rst | 9 ++++++--- pandas/core/groupby/groupby.py | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index 921eb737aef07..201e33c2fd0ee 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -35,9 +35,12 @@ Function application :toctree: api/ GroupBy.apply - GroupBy.agg - GroupBy.aggregate - GroupBy.transform + SeriesGroupBy.agg + DataFrameGroupBy.agg + SeriesGroupBy.aggregate + DataFrameGroupBy.aggregate + SeriesGroupBy.transform + DataFrameGroupBy.transform GroupBy.pipe Computations / descriptive stats diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6924c7d320bc4..ff311aa3a162d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -313,11 +313,10 @@ class providing the base-class of operations. Examples -------- -# Same shape >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', ... 'foo', 'bar'], ... 'B' : ['one', 'one', 'two', 'three', -... 'two', 'two'], +... 'two', 'two'], ... 'C' : [1, 5, 5, 2, 5, 5], ... 'D' : [2.0, 5., 8., 1., 2., 9.]}) >>> grouped = df.groupby('A') @@ -330,7 +329,8 @@ class providing the base-class of operations. 4 0.577350 -0.577350 5 0.577350 1.000000 -# Broadcastable +Broadcast result of the transformation + >>> grouped.transform(lambda x: x.max() - x.min()) C D 0 4 6.0 From f46ae3c2908c4262747db46ba02c6d8e3e3d7d9f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 27 Apr 2020 23:17:13 -0700 Subject: [PATCH 2/5] DOC: Fix groupby.agg/transform rst reference and numba references --- pandas/core/frame.py | 3 +++ pandas/core/generic.py | 6 ++++++ pandas/core/groupby/generic.py | 13 +++++++++++-- pandas/core/resample.py | 3 +++ pandas/core/series.py | 3 +++ pandas/core/util/numba_.py | 33 +++++++++++++++++++++++++++++++++ pandas/core/window/ewm.py | 3 +++ pandas/core/window/expanding.py | 3 +++ pandas/core/window/rolling.py | 6 ++++++ 9 files changed, 71 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4b4801f4e8c58..f360c66804889 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7105,6 +7105,9 @@ def _gotitem( see_also=_agg_summary_and_see_also_doc, examples=_agg_examples_doc, versionadded="\n.. versionadded:: 0.20.0\n", + numba_func_notes="", + numba_args="", + numba_notes="", **_shared_doc_kwargs, ) @Appender(_shared_docs["aggregate"]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b550857252466..07c3ddf145163 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5071,12 +5071,17 @@ def pipe(self, func, *args, **kwargs): Function to use for aggregating the data. If a function, must either work when passed a %(klass)s or when passed to %(klass)s.apply. + %(numba_func_notes)s + Accepted combinations are: - function - string function name - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - dict of axis labels -> functions, function names or list of such. + + + %(numba_args)s %(axis)s *args Positional arguments to pass to `func`. @@ -5100,6 +5105,7 @@ def pipe(self, func, *args, **kwargs): `agg` is an alias for `aggregate`. Use the alias. A passed user-defined-function will be passed a Series for evaluation. + %(numba_notes)s %(examples)s""" ) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ddf553dd1dd62..6bcaab22b9263 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -80,6 +80,9 @@ check_kwargs_and_nopython, get_jit_arguments, jit_user_function, + numba_groupby_args, + numba_groupby_func_notes, + numba_groupby_notes, split_for_numba, validate_udf, ) @@ -242,6 +245,9 @@ def apply(self, func, *args, **kwargs): versionadded="", klass="Series", axis="", + numba_func_notes=numba_groupby_func_notes, + numba_args=numba_groupby_args, + numba_notes=numba_groupby_notes, ) @Appender(_shared_docs["aggregate"]) def aggregate( @@ -476,7 +482,7 @@ def _aggregate_named(self, func, *args, **kwargs): return result - @Substitution(klass="Series", selected="A.") + @Substitution(klass="Series") @Appender(_transform_template) def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs): func = self._get_cython_func(func) or func @@ -946,6 +952,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]): versionadded="", klass="DataFrame", axis="", + numba_func_notes=numba_groupby_func_notes, + numba_args=numba_groupby_args, + numba_notes=numba_groupby_notes, ) @Appender(_shared_docs["aggregate"]) def aggregate( @@ -1466,7 +1475,7 @@ def _transform_general( concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False) return self._set_result_index_ordered(concatenated) - @Substitution(klass="DataFrame", selected="") + @Substitution(klass="DataFrame") @Appender(_transform_template) def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 06751d9c35fab..3feee63bfa05c 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -273,6 +273,9 @@ def pipe(self, func, *args, **kwargs): versionadded="", klass="DataFrame", axis="", + numba_func_notes="", + numba_args="", + numba_notes="", ) @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): diff --git a/pandas/core/series.py b/pandas/core/series.py index eb409b432f89c..36a17a70c7b24 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3809,6 +3809,9 @@ def _gotitem(self, key, ndim, subset=None) -> "Series": see_also=_agg_see_also_doc, examples=_agg_examples_doc, versionadded="\n.. versionadded:: 0.20.0\n", + numba_func_notes="", + numba_args="", + numba_notes="", **_shared_doc_kwargs, ) @Appender(generic._shared_docs["aggregate"]) diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py index c2e4b38ad5b4d..5155714a6d581 100644 --- a/pandas/core/util/numba_.py +++ b/pandas/core/util/numba_.py @@ -13,6 +13,39 @@ NUMBA_FUNC_CACHE: Dict[Tuple[Callable, str], Callable] = dict() +numba_groupby_func_notes = """ + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + .. versionchanged:: 1.1.0""" +numba_groupby_args = """ + + engine : str, default 'cython' + + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + + .. versionadded:: 1.1.0 + + engine_kwargs : dict, default None + + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + applied to the function + + .. versionadded:: 1.1.0""" +numba_groupby_notes = """ + When using ``engine='numba'``, there will be no "fall back" behavior internally. + The group data and group index will be passed as numpy arrays to the JITed + user defined function, and no alternative execution attempts will be tried.""" + + def check_kwargs_and_nopython( kwargs: Optional[Dict] = None, nopython: Optional[bool] = None ) -> None: diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 2759280dc1d1c..5530a56d437be 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -188,6 +188,9 @@ def _constructor(self): versionadded="", klass="Series/Dataframe", axis="", + numba_func_notes="", + numba_args="", + numba_notes="", ) @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 146c139806bca..5980b496d28b6 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -120,6 +120,9 @@ def _get_window(self, other=None, **kwargs): versionadded="", klass="Series/Dataframe", axis="", + numba_func_notes="", + numba_args="", + numba_notes="", ) @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 6c775953e18db..e4bd98fa8ba6c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1070,6 +1070,9 @@ def _get_window( versionadded="", klass="Series/DataFrame", axis="", + numba_func_notes="", + numba_args="", + numba_notes="", ) @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): @@ -1937,6 +1940,9 @@ def _validate_freq(self): versionadded="", klass="Series/Dataframe", axis="", + numba_func_notes="", + numba_args="", + numba_notes="", ) @Appender(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): From d5a4c01b15db1f558d06fe919464d8ba6cd0f2a0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 27 Apr 2020 23:18:14 -0700 Subject: [PATCH 3/5] Add additional note to transform --- pandas/core/groupby/groupby.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ff311aa3a162d..4ab6400e39c92 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -310,6 +310,10 @@ class providing the base-class of operations. * f must not mutate groups. Mutation is not supported and may produce unexpected results. +When using ``engine='numba'``, there will be no "fall back" behavior internally. +The group data and group index will be passed as numpy arrays to the JITed +user defined function, and no alternative execution attempts will be tried. + Examples -------- From ad9a0be98aa1746ff380860347ce0f7940443b92 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 27 Apr 2020 23:24:04 -0700 Subject: [PATCH 4/5] lint --- pandas/core/groupby/groupby.py | 2 +- pandas/core/util/numba_.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4ab6400e39c92..50fac4753eda7 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -311,7 +311,7 @@ class providing the base-class of operations. produce unexpected results. When using ``engine='numba'``, there will be no "fall back" behavior internally. -The group data and group index will be passed as numpy arrays to the JITed +The group data and group index will be passed as numpy arrays to the JITed user defined function, and no alternative execution attempts will be tried. Examples diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py index 5155714a6d581..4b56e6bcf6386 100644 --- a/pandas/core/util/numba_.py +++ b/pandas/core/util/numba_.py @@ -19,7 +19,7 @@ first and second arguments respectively in the function signature. Each group's index will be passed to the user defined function and optionally available for use. - + .. versionchanged:: 1.1.0""" numba_groupby_args = """ @@ -42,7 +42,7 @@ .. versionadded:: 1.1.0""" numba_groupby_notes = """ When using ``engine='numba'``, there will be no "fall back" behavior internally. - The group data and group index will be passed as numpy arrays to the JITed + The group data and group index will be passed as numpy arrays to the JITed user defined function, and no alternative execution attempts will be tried.""" From e3844df769c41285e2874717a2bbf3feb6bf12d8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 28 Apr 2020 10:17:47 -0700 Subject: [PATCH 5/5] Fix docstring validation --- pandas/core/generic.py | 1 - pandas/core/groupby/generic.py | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 07c3ddf145163..4b03823b88cbc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5080,7 +5080,6 @@ def pipe(self, func, *args, **kwargs): - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - dict of axis labels -> functions, function names or list of such. - %(numba_args)s %(axis)s *args diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 6bcaab22b9263..0cd73cc56470f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -184,9 +184,9 @@ def _selection_name(self): """ See Also -------- - pandas.Series.groupby.apply - pandas.Series.groupby.transform - pandas.Series.aggregate + Series.groupby.apply + Series.groupby.transform + Series.aggregate """ ) @@ -864,9 +864,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]): """ See Also -------- - pandas.DataFrame.groupby.apply - pandas.DataFrame.groupby.transform - pandas.DataFrame.aggregate + DataFrame.groupby.apply + DataFrame.groupby.transform + DataFrame.aggregate """ )