Skip to content

ENH: Let Resampler objects have a pipe method #18940

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2274,6 +2274,7 @@ Function application
Resampler.apply
Resampler.aggregate
Resampler.transform
Resampler.pipe

Upsampling
~~~~~~~~~~
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ Other Enhancements
- ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories`
can now take a callable as their argument (:issue:`18862`)
- :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`)
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).

.. _whatsnew_0230.api_breaking:

Expand Down
125 changes: 77 additions & 48 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,60 @@
dtype: int64
""")

_pipe_template = """\
Apply a function ``func`` with arguments to this %(klass)s object and return
the function's result.

%(versionadded)s

Use ``.pipe`` when you want to improve readability by chaining together
functions that expect Series, DataFrames, GroupBy or Resampler objects.
Instead of writing

>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c)

You can write

>>> (df.groupby('group')
... .pipe(f)
... .pipe(g, arg1=a)
... .pipe(h, arg2=b, arg3=c))

which is much more readable.

Parameters
----------
func : callable or tuple of (callable, string)
Function to apply to this %(klass)s object or, alternatively,
a ``(callable, data_keyword)`` tuple where ``data_keyword`` is a
string indicating the keyword of ``callable`` that expects the
%(klass)s object.
args : iterable, optional
positional arguments passed into ``func``.
kwargs : dict, optional
a dictionary of keyword arguments passed into ``func``.

Returns
-------
object : the return type of ``func``.

Notes
-----
See more `here
<http://pandas.pydata.org/pandas-docs/stable/groupby.html#pipe>`_

Examples
--------
%(examples)s

See Also
--------
pandas.Series.pipe : Apply a function with arguments to a series
pandas.DataFrame.pipe: Apply a function with arguments to a dataframe
apply : Apply function to each group instead of to the
full %(klass)s object.
"""

_transform_template = """
Call function producing a like-indexed %(klass)s on each group and
return a %(klass)s having the same indexes as the original object
Expand Down Expand Up @@ -676,6 +730,29 @@ def __getattr__(self, attr):
raise AttributeError("%r object has no attribute %r" %
(type(self).__name__, attr))

@Substitution(klass='GroupBy',
versionadded='.. versionadded:: 0.21.0',
examples="""\
>>> df = pd.DataFrame({'A': 'a b a b'.split(), 'B': [1, 2, 3, 4]})
>>> df
A B
0 a 1
1 b 2
2 a 3
3 b 4

To get the difference between each groups maximum and minimum value in one
pass, you can do

>>> df.groupby('A').pipe(lambda x: x.max() - x.min())
B
A
a 2
b 2""")
@Appender(_pipe_template)
def pipe(self, func, *args, **kwargs):
return _pipe(self, func, *args, **kwargs)

plot = property(GroupByPlot)

def _make_wrapper(self, name):
Expand Down Expand Up @@ -1779,54 +1856,6 @@ def tail(self, n=5):
mask = self._cumcount_array(ascending=False) < n
return self._selected_obj[mask]

def pipe(self, func, *args, **kwargs):
""" Apply a function with arguments to this GroupBy object,

.. versionadded:: 0.21.0

Parameters
----------
func : callable or tuple of (callable, string)
Function to apply to this GroupBy object or, alternatively, a
``(callable, data_keyword)`` tuple where ``data_keyword`` is a
string indicating the keyword of ``callable`` that expects the
GroupBy object.
args : iterable, optional
positional arguments passed into ``func``.
kwargs : dict, optional
a dictionary of keyword arguments passed into ``func``.

Returns
-------
object : the return type of ``func``.

Notes
-----
Use ``.pipe`` when chaining together functions that expect
Series, DataFrames or GroupBy objects. Instead of writing

>>> f(g(h(df.groupby('group')), arg1=a), arg2=b, arg3=c)

You can write

>>> (df
... .groupby('group')
... .pipe(f, arg1)
... .pipe(g, arg2)
... .pipe(h, arg3))

See more `here
<http://pandas.pydata.org/pandas-docs/stable/groupby.html#pipe>`_

See Also
--------
pandas.Series.pipe : Apply a function with arguments to a series
pandas.DataFrame.pipe: Apply a function with arguments to a dataframe
apply : Apply function to each group instead of to the
full GroupBy object.
"""
return _pipe(self, func, *args, **kwargs)


GroupBy._add_numeric_operations()

Expand Down
28 changes: 26 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from pandas.core.base import AbstractMethodError, GroupByMixin

from pandas.core.groupby import (BinGrouper, Grouper, _GroupBy, GroupBy,
SeriesGroupBy, groupby, PanelGroupBy)
SeriesGroupBy, groupby, PanelGroupBy,
_pipe_template)

from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod
from pandas.core.indexes.datetimes import DatetimeIndex, date_range
Expand All @@ -26,7 +27,7 @@
from pandas._libs.lib import Timestamp
from pandas._libs.tslibs.period import IncompatibleFrequency

from pandas.util._decorators import Appender
from pandas.util._decorators import Appender, Substitution
from pandas.core.generic import _shared_docs
_shared_docs_kwargs = dict()

Expand Down Expand Up @@ -257,6 +258,29 @@ def _assure_grouper(self):
""" make sure that we are creating our binner & grouper """
self._set_binner()

@Substitution(klass='Resampler',
versionadded='.. versionadded:: 0.23.0',
examples="""
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
... index=pd.date_range('2012-08-02', periods=4))
>>> df
A
2012-08-02 1
2012-08-03 2
2012-08-04 3
2012-08-05 4

To get the difference between each 2-day period's maximum and minimum value in
one pass, you can do

>>> df.resample('2D').pipe(lambda x: x.max() - x.min())
A
2012-08-02 1
2012-08-04 1""")
@Appender(_pipe_template)
def pipe(self, func, *args, **kwargs):
return super(Resampler, self).pipe(func, *args, **kwargs)

def plot(self, *args, **kwargs):
# for compat with prior versions, we want to
# have the warnings shown here and just have this work
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,21 @@ def test_groupby_resample_on_api(self):
result = df.groupby('key').resample('D', on='dates').mean()
assert_frame_equal(result, expected)

def test_pipe(self):
# GH17905

# series
r = self.series.resample('H')
expected = r.max() - r.mean()
result = r.pipe(lambda x: x.max() - x.mean())
tm.assert_series_equal(result, expected)

# dataframe
r = self.frame.resample('H')
expected = r.max() - r.mean()
result = r.pipe(lambda x: x.max() - x.mean())
tm.assert_frame_equal(result, expected)

@td.skip_if_no_mpl
def test_plot_api(self):
# .resample(....).plot(...)
Expand Down