diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79265e35ef6e6..12a7141bdb0ee 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -107,6 +107,10 @@ _shared_doc_kwargs = dict( axes='index, columns', klass='DataFrame', axes_single_arg="{0 or 'index', 1 or 'columns'}", + axis=""" + axis : {0 or 'index', 1 or 'columns'}, default 0 + - 0 or 'index': apply function to each column. + - 1 or 'columns': apply function to each row.""", optional_by=""" by : str or list of str Name or list of names to sort by. @@ -4460,9 +4464,9 @@ def pivot(self, index=None, columns=None, values=None): Reshape data (produce a "pivot" table) based on column values. Uses unique values from specified `index` / `columns` to form axes of the - resulting DataFrame. This function does not support data aggregation, - multiple values will result in a MultiIndex in the columns. See the - :ref:`User Guide ` for more on reshaping. + resulting DataFrame. This function does not support data + aggregation, multiple values will result in a MultiIndex in the + columns. See the :ref:`User Guide ` for more on reshaping. Parameters ---------- @@ -4980,36 +4984,59 @@ def _gotitem(self, key, ndim, subset=None): return self[key] _agg_doc = dedent(""" + Notes + ----- + The aggregation operations are always performed over an axis, either the + index (default) or the column axis. This behavior is different from + `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, + `var`), where the default is to compute the aggregation of the flattened + array, e.g., ``numpy.mean(arr_2d)`` as opposed to ``numpy.mean(arr_2d, + axis=0)``. + + `agg` is an alias for `aggregate`. Use the alias. + Examples -------- + >>> df = pd.DataFrame([[1, 2, 3], + ... [4, 5, 6], + ... [7, 8, 9], + ... [np.nan, np.nan, np.nan]], + ... columns=['A', 'B', 'C']) - >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], - ... index=pd.date_range('1/1/2000', periods=10)) - >>> df.iloc[3:7] = np.nan - - Aggregate these functions across all columns + Aggregate these functions over the rows. >>> df.agg(['sum', 'min']) - A B C - sum -0.182253 -0.614014 -2.909534 - min -1.916563 -1.460076 -1.568297 + A B C + sum 12.0 15.0 18.0 + min 1.0 2.0 3.0 - Different aggregations per column + Different aggregations per column. >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) - A B - max NaN 1.514318 - min -1.916563 -1.460076 - sum -0.182253 NaN + A B + max NaN 8.0 + min 1.0 2.0 + sum 12.0 NaN + + Aggregate over the columns. + + >>> df.agg("mean", axis="columns") + 0 2.0 + 1 5.0 + 2 8.0 + 3 NaN + dtype: float64 See also -------- - pandas.DataFrame.apply - pandas.DataFrame.transform - pandas.DataFrame.groupby.aggregate - pandas.DataFrame.resample.aggregate - pandas.DataFrame.rolling.aggregate - + DataFrame.apply : Perform any type of operations. + DataFrame.transform : Perform transformation type operations. + pandas.core.groupby.GroupBy : Perform operations over groups. + pandas.core.resample.Resampler : Perform operations over resampled bins. + pandas.core.window.Rolling : Perform operations over rolling window. + pandas.core.window.Expanding : Perform operations over expanding window. + pandas.core.window.EWM : Perform operation over exponential weighted + window. """) @Appender(_agg_doc) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bfb251b0995ec..494351dd27ca5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3937,36 +3937,37 @@ def pipe(self, func, *args, **kwargs): return com._pipe(self, func, *args, **kwargs) _shared_docs['aggregate'] = (""" - Aggregate using callable, string, dict, or list of string/callables + Aggregate using one or more operations over the specified axis. %(versionadded)s Parameters ---------- - func : callable, string, dictionary, or list of string/callables + func : function, string, dictionary, or list of string/functions Function to use for aggregating the data. If a function, must either work when passed a %(klass)s or when passed to %(klass)s.apply. For a DataFrame, can pass a dict, if the keys are DataFrame column names. - Accepted Combinations are: + Accepted combinations are: - - string function name - - function - - list of functions - - dict of column names -> functions (or list of functions) + - string function name. + - function. + - list of functions. + - dict of column names -> functions (or list of functions). - Notes - ----- - Numpy functions mean/median/prod/sum/std/var are special cased so the - default behavior is applying the function along axis=0 - (e.g., np.mean(arr_2d, axis=0)) as opposed to - mimicking the default Numpy behavior (e.g., np.mean(arr_2d)). - - `agg` is an alias for `aggregate`. Use the alias. + %(axis)s + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. Returns ------- aggregated : %(klass)s + + Notes + ----- + `agg` is an alias for `aggregate`. Use the alias. """) _shared_docs['transform'] = (""" @@ -4014,7 +4015,6 @@ def pipe(self, func, *args, **kwargs): -------- pandas.%(klass)s.aggregate pandas.%(klass)s.apply - """) # ---------------------------------------------------------------------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index a89b8714db6a0..4352a001aa989 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3432,7 +3432,8 @@ def apply(self, func, *args, **kwargs): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( klass='Series', - versionadded='')) + versionadded='', + axis='')) def aggregate(self, func_or_funcs, *args, **kwargs): _level = kwargs.pop('_level', None) if isinstance(func_or_funcs, compat.string_types): @@ -4611,7 +4612,8 @@ class DataFrameGroupBy(NDFrameGroupBy): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( klass='DataFrame', - versionadded='')) + versionadded='', + axis='')) def aggregate(self, arg, *args, **kwargs): return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4f9c22ca98f1a..004d572375234 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -334,7 +334,8 @@ def plot(self, *args, **kwargs): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( klass='DataFrame', - versionadded='')) + versionadded='', + axis='')) def aggregate(self, arg, *args, **kwargs): self._set_binner() diff --git a/pandas/core/series.py b/pandas/core/series.py index 46d1f4468b4d0..4d6bbedc51922 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -77,6 +77,10 @@ _shared_doc_kwargs = dict( axes='index', klass='Series', axes_single_arg="{0 or 'index'}", + axis=""" + axis : {0 or 'index'} + Parameter needed for compatibility with DataFrame. + """, inplace="""inplace : boolean, default False If True, performs operation inplace and returns None.""", unique='np.ndarray', duplicated='Series', diff --git a/pandas/core/window.py b/pandas/core/window.py index 59cf9ad2920ca..e70a3cb5e911b 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -626,7 +626,8 @@ def f(arg, *args, **kwargs): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', - klass='Series/DataFrame')) + klass='Series/DataFrame', + axis='')) def aggregate(self, arg, *args, **kwargs): result, how = self._aggregate(arg, *args, **kwargs) if result is None: @@ -1300,7 +1301,8 @@ def _validate_freq(self): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', - klass='Series/DataFrame')) + klass='Series/DataFrame', + axis='')) def aggregate(self, arg, *args, **kwargs): return super(Rolling, self).aggregate(arg, *args, **kwargs) @@ -1566,7 +1568,8 @@ def _get_window(self, other=None): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', - klass='Series/DataFrame')) + klass='Series/DataFrame', + axis='')) def aggregate(self, arg, *args, **kwargs): return super(Expanding, self).aggregate(arg, *args, **kwargs) @@ -1869,7 +1872,8 @@ def _constructor(self): @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', - klass='Series/DataFrame')) + klass='Series/DataFrame', + axis='')) def aggregate(self, arg, *args, **kwargs): return super(EWM, self).aggregate(arg, *args, **kwargs)