diff --git a/doc/source/api.rst b/doc/source/api.rst index 64f972e52d190..68721b76eed7e 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2274,6 +2274,7 @@ Function application Resampler.apply Resampler.aggregate Resampler.transform + Resampler.pipe Upsampling ~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 3f300deddebeb..735742964f3ee 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -142,6 +142,8 @@ Other Enhancements - ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories` can now take a callable as their argument (:issue:`18862`) - :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`) +- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method. + Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). .. _whatsnew_0230.api_breaking: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ced120fbdbe29..47b80c00da4d4 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -191,6 +191,60 @@ dtype: int64 """) +_pipe_template = """\ +Apply a function ``func`` with arguments to this %(klass)s object and return +the function's result. + +%(versionadded)s + +Use ``.pipe`` when you want to improve readability by chaining together +functions that expect Series, DataFrames, GroupBy or Resampler objects. +Instead of writing + +>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) + +You can write + +>>> (df.groupby('group') +... .pipe(f) +... .pipe(g, arg1=a) +... .pipe(h, arg2=b, arg3=c)) + +which is much more readable. + +Parameters +---------- +func : callable or tuple of (callable, string) + Function to apply to this %(klass)s object or, alternatively, + a ``(callable, data_keyword)`` tuple where ``data_keyword`` is a + string indicating the keyword of ``callable`` that expects the + %(klass)s object. +args : iterable, optional + positional arguments passed into ``func``. +kwargs : dict, optional + a dictionary of keyword arguments passed into ``func``. + +Returns +------- +object : the return type of ``func``. + +Notes +----- +See more `here +`_ + +Examples +-------- +%(examples)s + +See Also +-------- +pandas.Series.pipe : Apply a function with arguments to a series +pandas.DataFrame.pipe: Apply a function with arguments to a dataframe +apply : Apply function to each group instead of to the + full %(klass)s object. +""" + _transform_template = """ Call function producing a like-indexed %(klass)s on each group and return a %(klass)s having the same indexes as the original object @@ -676,6 +730,29 @@ def __getattr__(self, attr): raise AttributeError("%r object has no attribute %r" % (type(self).__name__, attr)) + @Substitution(klass='GroupBy', + versionadded='.. versionadded:: 0.21.0', + examples="""\ +>>> df = pd.DataFrame({'A': 'a b a b'.split(), 'B': [1, 2, 3, 4]}) +>>> df + A B +0 a 1 +1 b 2 +2 a 3 +3 b 4 + +To get the difference between each groups maximum and minimum value in one +pass, you can do + +>>> df.groupby('A').pipe(lambda x: x.max() - x.min()) + B +A +a 2 +b 2""") + @Appender(_pipe_template) + def pipe(self, func, *args, **kwargs): + return _pipe(self, func, *args, **kwargs) + plot = property(GroupByPlot) def _make_wrapper(self, name): @@ -1779,54 +1856,6 @@ def tail(self, n=5): mask = self._cumcount_array(ascending=False) < n return self._selected_obj[mask] - def pipe(self, func, *args, **kwargs): - """ Apply a function with arguments to this GroupBy object, - - .. versionadded:: 0.21.0 - - Parameters - ---------- - func : callable or tuple of (callable, string) - Function to apply to this GroupBy object or, alternatively, a - ``(callable, data_keyword)`` tuple where ``data_keyword`` is a - string indicating the keyword of ``callable`` that expects the - GroupBy object. - args : iterable, optional - positional arguments passed into ``func``. - kwargs : dict, optional - a dictionary of keyword arguments passed into ``func``. - - Returns - ------- - object : the return type of ``func``. - - Notes - ----- - Use ``.pipe`` when chaining together functions that expect - Series, DataFrames or GroupBy objects. Instead of writing - - >>> f(g(h(df.groupby('group')), arg1=a), arg2=b, arg3=c) - - You can write - - >>> (df - ... .groupby('group') - ... .pipe(f, arg1) - ... .pipe(g, arg2) - ... .pipe(h, arg3)) - - See more `here - `_ - - See Also - -------- - pandas.Series.pipe : Apply a function with arguments to a series - pandas.DataFrame.pipe: Apply a function with arguments to a dataframe - apply : Apply function to each group instead of to the - full GroupBy object. - """ - return _pipe(self, func, *args, **kwargs) - GroupBy._add_numeric_operations() diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 9f5439b68558b..c2bf7cff746eb 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -8,7 +8,8 @@ from pandas.core.base import AbstractMethodError, GroupByMixin from pandas.core.groupby import (BinGrouper, Grouper, _GroupBy, GroupBy, - SeriesGroupBy, groupby, PanelGroupBy) + SeriesGroupBy, groupby, PanelGroupBy, + _pipe_template) from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.core.indexes.datetimes import DatetimeIndex, date_range @@ -26,7 +27,7 @@ from pandas._libs.lib import Timestamp from pandas._libs.tslibs.period import IncompatibleFrequency -from pandas.util._decorators import Appender +from pandas.util._decorators import Appender, Substitution from pandas.core.generic import _shared_docs _shared_docs_kwargs = dict() @@ -257,6 +258,29 @@ def _assure_grouper(self): """ make sure that we are creating our binner & grouper """ self._set_binner() + @Substitution(klass='Resampler', + versionadded='.. versionadded:: 0.23.0', + examples=""" +>>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, +... index=pd.date_range('2012-08-02', periods=4)) +>>> df + A +2012-08-02 1 +2012-08-03 2 +2012-08-04 3 +2012-08-05 4 + +To get the difference between each 2-day period's maximum and minimum value in +one pass, you can do + +>>> df.resample('2D').pipe(lambda x: x.max() - x.min()) + A +2012-08-02 1 +2012-08-04 1""") + @Appender(_pipe_template) + def pipe(self, func, *args, **kwargs): + return super(Resampler, self).pipe(func, *args, **kwargs) + def plot(self, *args, **kwargs): # for compat with prior versions, we want to # have the warnings shown here and just have this work diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index f00fa07d868a1..38f4b8be469a5 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -235,6 +235,21 @@ def test_groupby_resample_on_api(self): result = df.groupby('key').resample('D', on='dates').mean() assert_frame_equal(result, expected) + def test_pipe(self): + # GH17905 + + # series + r = self.series.resample('H') + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_series_equal(result, expected) + + # dataframe + r = self.frame.resample('H') + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_frame_equal(result, expected) + @td.skip_if_no_mpl def test_plot_api(self): # .resample(....).plot(...)