Skip to content

DOC: update the aggregate docstring #20276

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 49 additions & 22 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@
_shared_doc_kwargs = dict(
axes='index, columns', klass='DataFrame',
axes_single_arg="{0 or 'index', 1 or 'columns'}",
axis="""
axis : {0 or 'index', 1 or 'columns'}, default 0
- 0 or 'index': apply function to each column.
- 1 or 'columns': apply function to each row.""",
optional_by="""
by : str or list of str
Name or list of names to sort by.
Expand Down Expand Up @@ -4460,9 +4464,9 @@ def pivot(self, index=None, columns=None, values=None):

Reshape data (produce a "pivot" table) based on column values. Uses
unique values from specified `index` / `columns` to form axes of the
resulting DataFrame. This function does not support data aggregation,
multiple values will result in a MultiIndex in the columns. See the
:ref:`User Guide <reshaping>` for more on reshaping.
resulting DataFrame. This function does not support data
aggregation, multiple values will result in a MultiIndex in the
columns. See the :ref:`User Guide <reshaping>` for more on reshaping.

Parameters
----------
Expand Down Expand Up @@ -4980,36 +4984,59 @@ def _gotitem(self, key, ndim, subset=None):
return self[key]

_agg_doc = dedent("""
Notes
-----
The aggregation operations are always performed over an axis, either the
index (default) or the column axis. This behavior is different from
`numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`,
`var`), where the default is to compute the aggregation of the flattened
array, e.g., ``numpy.mean(arr_2d)`` as opposed to ``numpy.mean(arr_2d,
axis=0)``.

`agg` is an alias for `aggregate`. Use the alias.

Examples
--------
>>> df = pd.DataFrame([[1, 2, 3],
... [4, 5, 6],
... [7, 8, 9],
... [np.nan, np.nan, np.nan]],
... columns=['A', 'B', 'C'])

>>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
... index=pd.date_range('1/1/2000', periods=10))
>>> df.iloc[3:7] = np.nan

Aggregate these functions across all columns
Aggregate these functions over the rows.

>>> df.agg(['sum', 'min'])
A B C
sum -0.182253 -0.614014 -2.909534
min -1.916563 -1.460076 -1.568297
A B C
sum 12.0 15.0 18.0
min 1.0 2.0 3.0

Different aggregations per column
Different aggregations per column.

>>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
A B
max NaN 1.514318
min -1.916563 -1.460076
sum -0.182253 NaN
A B
max NaN 8.0
min 1.0 2.0
sum 12.0 NaN

Aggregate over the columns.

>>> df.agg("mean", axis="columns")
0 2.0
1 5.0
2 8.0
3 NaN
dtype: float64

See also
--------
pandas.DataFrame.apply
pandas.DataFrame.transform
pandas.DataFrame.groupby.aggregate
pandas.DataFrame.resample.aggregate
pandas.DataFrame.rolling.aggregate

DataFrame.apply : Perform any type of operations.
DataFrame.transform : Perform transformation type operations.
pandas.core.groupby.GroupBy : Perform operations over groups.
pandas.core.resample.Resampler : Perform operations over resampled bins.
pandas.core.window.Rolling : Perform operations over rolling window.
pandas.core.window.Expanding : Perform operations over expanding window.
pandas.core.window.EWM : Perform operation over exponential weighted
window.
""")

@Appender(_agg_doc)
Expand Down
32 changes: 16 additions & 16 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3937,36 +3937,37 @@ def pipe(self, func, *args, **kwargs):
return com._pipe(self, func, *args, **kwargs)

_shared_docs['aggregate'] = ("""
Aggregate using callable, string, dict, or list of string/callables
Aggregate using one or more operations over the specified axis.

%(versionadded)s

Parameters
----------
func : callable, string, dictionary, or list of string/callables
func : function, string, dictionary, or list of string/functions
Function to use for aggregating the data. If a function, must either
work when passed a %(klass)s or when passed to %(klass)s.apply. For
a DataFrame, can pass a dict, if the keys are DataFrame column names.

Accepted Combinations are:
Accepted combinations are:

- string function name
- function
- list of functions
- dict of column names -> functions (or list of functions)
- string function name.
- function.
- list of functions.
- dict of column names -> functions (or list of functions).

Notes
-----
Numpy functions mean/median/prod/sum/std/var are special cased so the
default behavior is applying the function along axis=0
(e.g., np.mean(arr_2d, axis=0)) as opposed to
mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).

`agg` is an alias for `aggregate`. Use the alias.
%(axis)s
*args
Positional arguments to pass to `func`.
**kwargs
Keyword arguments to pass to `func`.

Returns
-------
aggregated : %(klass)s

Notes
-----
`agg` is an alias for `aggregate`. Use the alias.
""")

_shared_docs['transform'] = ("""
Expand Down Expand Up @@ -4014,7 +4015,6 @@ def pipe(self, func, *args, **kwargs):
--------
pandas.%(klass)s.aggregate
pandas.%(klass)s.apply

""")

# ----------------------------------------------------------------------
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3432,7 +3432,8 @@ def apply(self, func, *args, **kwargs):
@Appender(_agg_doc)
@Appender(_shared_docs['aggregate'] % dict(
klass='Series',
versionadded=''))
versionadded='',
axis=''))
def aggregate(self, func_or_funcs, *args, **kwargs):
_level = kwargs.pop('_level', None)
if isinstance(func_or_funcs, compat.string_types):
Expand Down Expand Up @@ -4611,7 +4612,8 @@ class DataFrameGroupBy(NDFrameGroupBy):
@Appender(_agg_doc)
@Appender(_shared_docs['aggregate'] % dict(
klass='DataFrame',
versionadded=''))
versionadded='',
axis=''))
def aggregate(self, arg, *args, **kwargs):
return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,8 @@ def plot(self, *args, **kwargs):
@Appender(_agg_doc)
@Appender(_shared_docs['aggregate'] % dict(
klass='DataFrame',
versionadded=''))
versionadded='',
axis=''))
def aggregate(self, arg, *args, **kwargs):

self._set_binner()
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@

_shared_doc_kwargs = dict(
axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
axis="""
axis : {0 or 'index'}
Parameter needed for compatibility with DataFrame.
""",
inplace="""inplace : boolean, default False
If True, performs operation inplace and returns None.""",
unique='np.ndarray', duplicated='Series',
Expand Down
12 changes: 8 additions & 4 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,8 @@ def f(arg, *args, **kwargs):
@Appender(_agg_doc)
@Appender(_shared_docs['aggregate'] % dict(
versionadded='',
klass='Series/DataFrame'))
klass='Series/DataFrame',
axis=''))
def aggregate(self, arg, *args, **kwargs):
result, how = self._aggregate(arg, *args, **kwargs)
if result is None:
Expand Down Expand Up @@ -1300,7 +1301,8 @@ def _validate_freq(self):
@Appender(_agg_doc)
@Appender(_shared_docs['aggregate'] % dict(
versionadded='',
klass='Series/DataFrame'))
klass='Series/DataFrame',
axis=''))
def aggregate(self, arg, *args, **kwargs):
return super(Rolling, self).aggregate(arg, *args, **kwargs)

Expand Down Expand Up @@ -1566,7 +1568,8 @@ def _get_window(self, other=None):
@Appender(_agg_doc)
@Appender(_shared_docs['aggregate'] % dict(
versionadded='',
klass='Series/DataFrame'))
klass='Series/DataFrame',
axis=''))
def aggregate(self, arg, *args, **kwargs):
return super(Expanding, self).aggregate(arg, *args, **kwargs)

Expand Down Expand Up @@ -1869,7 +1872,8 @@ def _constructor(self):
@Appender(_agg_doc)
@Appender(_shared_docs['aggregate'] % dict(
versionadded='',
klass='Series/DataFrame'))
klass='Series/DataFrame',
axis=''))
def aggregate(self, arg, *args, **kwargs):
return super(EWM, self).aggregate(arg, *args, **kwargs)

Expand Down