From 0cb2603622b77f80bdb9310e53ec1b60dce529da Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sat, 10 Mar 2018 17:55:28 +0100 Subject: [PATCH 1/5] DOC: update the aggregate docstring --- pandas/core/frame.py | 49 ++++++++++++++++++++++++++---------------- pandas/core/generic.py | 35 +++++++++++++++--------------- pandas/core/series.py | 7 ++++++ 3 files changed, 56 insertions(+), 35 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a66d00fff9714..4894c5fa1b170 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4764,36 +4764,49 @@ def _gotitem(self, key, ndim, subset=None): return self[key] _agg_doc = dedent(""" + Notes + ----- + The default behavior of aggregating over the axis 0 is different from + `numpy` functions `mean`/`median`/`prod`/`sum`/`std`/`var`, where the + default is to compute the aggregation of the flattened array (e.g., + `numpy.mean(arr_2d)` as opposed to `numpy.mean(arr_2d, axis=0)`). + + `agg` is an alias for `aggregate`. Use the alias. + Examples -------- - >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], - ... index=pd.date_range('1/1/2000', periods=10)) - >>> df.iloc[3:7] = np.nan + >>> df = df = pd.DataFrame([[1,2,3], + ... [4,5,6], + ... [7,8,9], + ... [np.nan, np.nan, np.nan]], + ... columns=['A', 'B', 'C']) Aggregate these functions across all columns - >>> df.agg(['sum', 'min']) - A B C - sum -0.182253 -0.614014 -2.909534 - min -1.916563 -1.460076 -1.568297 + >>> df.aggregate(['sum', 'min']) + A B C + sum 12.0 15.0 18.0 + min 1.0 2.0 3.0 Different aggregations per column - >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) - A B - max NaN 1.514318 - min -1.916563 -1.460076 - sum -0.182253 NaN + >>> df.aggregate({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) + A B + max NaN 8.0 + min 1.0 2.0 + sum 12.0 NaN See also -------- - pandas.DataFrame.apply - pandas.DataFrame.transform - pandas.DataFrame.groupby.aggregate - pandas.DataFrame.resample.aggregate - pandas.DataFrame.rolling.aggregate - + pandas.DataFrame.apply : Perform any type of operations. + pandas.DataFrame.transform : Perform transformation type operations. + pandas.DataFrame.groupby.aggregate : Perform aggregation type operations + over groups. + pandas.DataFrame.resample.aggregate : Perform aggregation type operations + over resampled bins. + pandas.DataFrame.rolling.aggregate : Perform aggregation type operations + over rolling window. """) @Appender(_agg_doc) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a893b2ba1a189..4e648e5a28930 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3929,36 +3929,38 @@ def pipe(self, func, *args, **kwargs): return com._pipe(self, func, *args, **kwargs) _shared_docs['aggregate'] = (""" - Aggregate using callable, string, dict, or list of string/callables + Aggregate using one or multiple operations along the specified axis. %(versionadded)s Parameters ---------- - func : callable, string, dictionary, or list of string/callables + func : function, string, dictionary, or list of string/functions Function to use for aggregating the data. If a function, must either work when passed a %(klass)s or when passed to %(klass)s.apply. For a DataFrame, can pass a dict, if the keys are DataFrame column names. - Accepted Combinations are: - - - string function name - - function - - list of functions - - dict of column names -> functions (or list of functions) - - Notes - ----- - Numpy functions mean/median/prod/sum/std/var are special cased so the - default behavior is applying the function along axis=0 - (e.g., np.mean(arr_2d, axis=0)) as opposed to - mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). + Accepted combinations are: - `agg` is an alias for `aggregate`. Use the alias. + - string function name. + - function. + - list of functions. + - dict of column names -> functions (or list of functions). + axis : {0 or 'index', 1 or 'columns'}, default 0 + - 0 or 'index': apply function to each column. + - 1 or 'columns': apply function to each row. + args + Optional positional arguments to pass to the function. + kwargs + Optional keyword arguments to pass to the function. Returns ------- aggregated : %(klass)s + + Notes + ----- + `agg` is an alias for `aggregate`. Use the alias. """) _shared_docs['transform'] = (""" @@ -4006,7 +4008,6 @@ def pipe(self, func, *args, **kwargs): -------- pandas.%(klass)s.aggregate pandas.%(klass)s.apply - """) # ---------------------------------------------------------------------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 069f0372ab6e1..25308261b4bee 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2353,6 +2353,13 @@ def _gotitem(self, key, ndim, subset=None): return self _agg_doc = dedent(""" + Notes + ----- + The only possible value for axis is 0 or 'index' because + :class:`~pandas.Series` has only one axis. + + `agg` is an alias for `aggregate`. Use the alias. + Examples -------- From 93798170cc3c90888a70275ff8a27e2621fdc973 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sun, 11 Mar 2018 15:50:39 +0100 Subject: [PATCH 2/5] Fix axis issue --- pandas/core/frame.py | 4 ++++ pandas/core/generic.py | 4 +--- pandas/core/groupby.py | 6 ++++-- pandas/core/resample.py | 3 ++- pandas/core/series.py | 11 ++++------- pandas/core/window.py | 12 ++++++++---- 6 files changed, 23 insertions(+), 17 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4894c5fa1b170..cd90f54e84403 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -107,6 +107,10 @@ _shared_doc_kwargs = dict( axes='index, columns', klass='DataFrame', axes_single_arg="{0 or 'index', 1 or 'columns'}", + axis=""" + axis : {0 or 'index', 1 or 'columns'}, default 0 + - 0 or 'index': apply function to each column. + - 1 or 'columns': apply function to each row.""", optional_by=""" by : str or list of str Name or list of names to sort by. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4e648e5a28930..8d4f325ee4669 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3946,9 +3946,7 @@ def pipe(self, func, *args, **kwargs): - function. - list of functions. - dict of column names -> functions (or list of functions). - axis : {0 or 'index', 1 or 'columns'}, default 0 - - 0 or 'index': apply function to each column. - - 1 or 'columns': apply function to each row. + %(axis)s args Optional positional arguments to pass to the function. kwargs diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 285c5786b532b..6a36c3d8a9f6a 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3431,7 +3431,8 @@ def apply(self, func, *args, **kwargs): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( klass='Series', - versionadded='')) + versionadded='', + axis='')) def aggregate(self, func_or_funcs, *args, **kwargs): _level = kwargs.pop('_level', None) if isinstance(func_or_funcs, compat.string_types): @@ -4610,7 +4611,8 @@ class DataFrameGroupBy(NDFrameGroupBy): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( klass='DataFrame', - versionadded='')) + versionadded='', + axis='')) def aggregate(self, arg, *args, **kwargs): return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 772568ee84737..4576bb86bed52 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -334,7 +334,8 @@ def plot(self, *args, **kwargs): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( klass='DataFrame', - versionadded='')) + versionadded='', + axis='')) def aggregate(self, arg, *args, **kwargs): self._set_binner() diff --git a/pandas/core/series.py b/pandas/core/series.py index 25308261b4bee..1f22aaea9e1af 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -77,6 +77,10 @@ _shared_doc_kwargs = dict( axes='index', klass='Series', axes_single_arg="{0, 'index'}", + axis=""" + axis : {0 or 'index'} + Parameter needed for compatibility. + """, inplace="""inplace : boolean, default False If True, performs operation inplace and returns None.""", unique='np.ndarray', duplicated='Series', @@ -2353,13 +2357,6 @@ def _gotitem(self, key, ndim, subset=None): return self _agg_doc = dedent(""" - Notes - ----- - The only possible value for axis is 0 or 'index' because - :class:`~pandas.Series` has only one axis. - - `agg` is an alias for `aggregate`. Use the alias. - Examples -------- diff --git a/pandas/core/window.py b/pandas/core/window.py index c41b07759d555..ec4765270a5d8 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -626,7 +626,8 @@ def f(arg, *args, **kwargs): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', - klass='Series/DataFrame')) + klass='Series/DataFrame', + axis='')) def aggregate(self, arg, *args, **kwargs): result, how = self._aggregate(arg, *args, **kwargs) if result is None: @@ -1192,7 +1193,8 @@ def _validate_freq(self): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', - klass='Series/DataFrame')) + klass='Series/DataFrame', + axis='')) def aggregate(self, arg, *args, **kwargs): return super(Rolling, self).aggregate(arg, *args, **kwargs) @@ -1436,7 +1438,8 @@ def _get_window(self, other=None): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', - klass='Series/DataFrame')) + klass='Series/DataFrame', + axis='')) def aggregate(self, arg, *args, **kwargs): return super(Expanding, self).aggregate(arg, *args, **kwargs) @@ -1717,7 +1720,8 @@ def _constructor(self): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', - klass='Series/DataFrame')) + klass='Series/DataFrame', + axis='')) def aggregate(self, arg, *args, **kwargs): return super(EWM, self).aggregate(arg, *args, **kwargs) From 56cce015178ea06c31c43ee60c90ca1acdb7806f Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Sun, 11 Mar 2018 18:03:11 +0100 Subject: [PATCH 3/5] Address requested changes --- pandas/core/frame.py | 40 +++++++++++++++++++++------------------- pandas/core/generic.py | 10 +++++----- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cd90f54e84403..cdf33d14fcc39 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4770,32 +4770,34 @@ def _gotitem(self, key, ndim, subset=None): _agg_doc = dedent(""" Notes ----- - The default behavior of aggregating over the axis 0 is different from - `numpy` functions `mean`/`median`/`prod`/`sum`/`std`/`var`, where the - default is to compute the aggregation of the flattened array (e.g., - `numpy.mean(arr_2d)` as opposed to `numpy.mean(arr_2d, axis=0)`). + The aggregation operations are always performed over an axis, either the + index (default) or the column axis. This behavior is different from + `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, + `var`), where the default is to compute the aggregation of the flattened + array, e.g., ``numpy.mean(arr_2d)`` as opposed to ``numpy.mean(arr_2d, + axis=0)``. `agg` is an alias for `aggregate`. Use the alias. Examples -------- - >>> df = df = pd.DataFrame([[1,2,3], - ... [4,5,6], - ... [7,8,9], - ... [np.nan, np.nan, np.nan]], - ... columns=['A', 'B', 'C']) + >>> df = pd.DataFrame([[1,2,3], + ... [4,5,6], + ... [7,8,9], + ... [np.nan, np.nan, np.nan]], + ... columns=['A', 'B', 'C']) Aggregate these functions across all columns - >>> df.aggregate(['sum', 'min']) + >>> df.agg(['sum', 'min']) A B C sum 12.0 15.0 18.0 min 1.0 2.0 3.0 Different aggregations per column - >>> df.aggregate({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) + >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) A B max NaN 8.0 min 1.0 2.0 @@ -4803,14 +4805,14 @@ def _gotitem(self, key, ndim, subset=None): See also -------- - pandas.DataFrame.apply : Perform any type of operations. - pandas.DataFrame.transform : Perform transformation type operations. - pandas.DataFrame.groupby.aggregate : Perform aggregation type operations - over groups. - pandas.DataFrame.resample.aggregate : Perform aggregation type operations - over resampled bins. - pandas.DataFrame.rolling.aggregate : Perform aggregation type operations - over rolling window. + DataFrame.apply : Perform any type of operations. + DataFrame.transform : Perform transformation type operations. + pandas.core.groupby.GroupBy : Perform operations over groups. + pandas.core.resample.Resampler : Perform operations over resampled bins. + pandas.core.window.Rolling : Perform operations over rolling window. + pandas.core.window.Expanding : Perform operations over expanding window. + pandas.core.window.EWM : Perform operation over exponential weighted + window. """) @Appender(_agg_doc) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8d4f325ee4669..90edb36f6e4fb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3929,7 +3929,7 @@ def pipe(self, func, *args, **kwargs): return com._pipe(self, func, *args, **kwargs) _shared_docs['aggregate'] = (""" - Aggregate using one or multiple operations along the specified axis. + Aggregate using one or more operations over the specified axis. %(versionadded)s @@ -3947,10 +3947,10 @@ def pipe(self, func, *args, **kwargs): - list of functions. - dict of column names -> functions (or list of functions). %(axis)s - args - Optional positional arguments to pass to the function. - kwargs - Optional keyword arguments to pass to the function. + *args + Positional arguments to pass to the function. + **kwargs + Keyword arguments to pass to the function. Returns ------- From a8c992afc4f340ca076e45fc2a9e5bdb5c9ed6c7 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral Date: Tue, 13 Mar 2018 06:48:44 +0100 Subject: [PATCH 4/5] Fix line too long --- pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9452b43c4735b..d28e7f557d67d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4463,10 +4463,10 @@ def pivot(self, index=None, columns=None, values=None): Return reshaped DataFrame organized by given index / column values. Reshape data (produce a "pivot" table) based on column values. Uses - unique values from specified `index` / `columns` to form axes of the resulting - DataFrame. This function does not support data aggregation, multiple - values will result in a MultiIndex in the columns. See the - :ref:`User Guide ` for more on reshaping. + unique values from specified `index` / `columns` to form axes of the + resulting DataFrame. This function does not support data + aggregation, multiple values will result in a MultiIndex in the + columns. See the :ref:`User Guide ` for more on reshaping. Parameters ---------- From e1a0c27c11cda12a942b36344edff5ab8c66728d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 13 Mar 2018 09:09:03 -0500 Subject: [PATCH 5/5] Updates PEP8. Added axis='columns' example. --- pandas/core/frame.py | 20 ++++++++++++++------ pandas/core/generic.py | 5 +++-- pandas/core/series.py | 2 +- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d28e7f557d67d..12a7141bdb0ee 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4997,21 +4997,20 @@ def _gotitem(self, key, ndim, subset=None): Examples -------- - - >>> df = pd.DataFrame([[1,2,3], - ... [4,5,6], - ... [7,8,9], + >>> df = pd.DataFrame([[1, 2, 3], + ... [4, 5, 6], + ... [7, 8, 9], ... [np.nan, np.nan, np.nan]], ... columns=['A', 'B', 'C']) - Aggregate these functions across all columns + Aggregate these functions over the rows. >>> df.agg(['sum', 'min']) A B C sum 12.0 15.0 18.0 min 1.0 2.0 3.0 - Different aggregations per column + Different aggregations per column. >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) A B @@ -5019,6 +5018,15 @@ def _gotitem(self, key, ndim, subset=None): min 1.0 2.0 sum 12.0 NaN + Aggregate over the columns. + + >>> df.agg("mean", axis="columns") + 0 2.0 + 1 5.0 + 2 8.0 + 3 NaN + dtype: float64 + See also -------- DataFrame.apply : Perform any type of operations. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d0a2236d24e8a..494351dd27ca5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3954,11 +3954,12 @@ def pipe(self, func, *args, **kwargs): - function. - list of functions. - dict of column names -> functions (or list of functions). + %(axis)s *args - Positional arguments to pass to the function. + Positional arguments to pass to `func`. **kwargs - Keyword arguments to pass to the function. + Keyword arguments to pass to `func`. Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 6cc4e29df77fb..4d6bbedc51922 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -79,7 +79,7 @@ axes='index', klass='Series', axes_single_arg="{0 or 'index'}", axis=""" axis : {0 or 'index'} - Parameter needed for compatibility. + Parameter needed for compatibility with DataFrame. """, inplace="""inplace : boolean, default False If True, performs operation inplace and returns None.""",