Skip to content

BUG: Combination of groupby.resample.interpolate() fails #35325

Closed
@clamydo

Description

@clamydo
  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • (optional) I have confirmed this bug exists on the master branch of pandas.


Code Sample, a copy-pastable example

d = {'price': [10, 11, 9, 13, 14, 18, 17, 19],
     'volume': [50, 60, 40, 100, 50, 100, 40, 50]}

df = pd.DataFrame(d)

df['week_starting'] = pd.date_range('01/01/2018', periods=8, freq='W')

df \
    .set_index("week_starting") \
    .groupby("volume") \
    .resample("1D") \
    .interpolate(method="linear")

Error:

TypeError                                 Traceback (most recent call last)
~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
    735             try:
--> 736                 result = self._python_apply_general(f)
    737             except TypeError:

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
    751     def _python_apply_general(self, f):
--> 752         keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis)
    753 

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
    205             group_axes = group.axes
--> 206             res = f(group)
    207             if not _is_indexed_like(res, group_axes):

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/resample.py in func(x)
    988 
--> 989             return x.apply(f, *args, **kwargs)
    990 

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/resample.py in aggregate(self, func, *args, **kwargs)
    284             grouper = None
--> 285             result = self._groupby_and_aggregate(how, grouper, *args, **kwargs)
    286 

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/resample.py in _groupby_and_aggregate(self, how, grouper, *args, **kwargs)
    360             else:
--> 361                 result = grouped.aggregate(how, *args, **kwargs)
    362         except DataError:

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, *args, **kwargs)
    923             # nicer error message
--> 924             raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")
    925 

TypeError: Must provide 'func' or tuples of '(column, aggfunc).

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-15-5d08bff36e4d> in <module>
----> 1 df = df \
      2     .set_index("week_starting") \
      3         .groupby("volume") \
      4             .resample("1D") \
      5                 .interpolate(method="linear")

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/resample.py in interpolate(self, method, axis, limit, inplace, limit_direction, limit_area, downcast, **kwargs)
    797         Interpolate values according to different methods.
    798         """
--> 799         result = self._upsample(None)
    800         return result.interpolate(
    801             method=method,

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/resample.py in _apply(self, f, grouper, *args, **kwargs)
    989             return x.apply(f, *args, **kwargs)
    990 
--> 991         result = self._groupby.apply(func)
    992         return self._wrap_result(result)
    993 

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
    745 
    746                 with _group_selection_context(self):
--> 747                     return self._python_apply_general(f)
    748 
    749         return result

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
    750 
    751     def _python_apply_general(self, f):
--> 752         keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis)
    753 
    754         return self._wrap_applied_output(

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
    204             # group might be modified
    205             group_axes = group.axes
--> 206             res = f(group)
    207             if not _is_indexed_like(res, group_axes):
    208                 mutated = True

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/resample.py in func(x)
    987                 return getattr(x, f)(**kwargs)
    988 
--> 989             return x.apply(f, *args, **kwargs)
    990 
    991         result = self._groupby.apply(func)

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/resample.py in aggregate(self, func, *args, **kwargs)
    283             how = func
    284             grouper = None
--> 285             result = self._groupby_and_aggregate(how, grouper, *args, **kwargs)
    286 
    287         result = self._apply_loffset(result)

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/resample.py in _groupby_and_aggregate(self, how, grouper, *args, **kwargs)
    359                 result = grouped._aggregate_item_by_item(how, *args, **kwargs)
    360             else:
--> 361                 result = grouped.aggregate(how, *args, **kwargs)
    362         except DataError:
    363             # we have a non-reducing function; try to evaluate

~/.cache/pypoetry/virtualenvs/lagps-6BuXYM4Y-py3.8/lib/python3.8/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, *args, **kwargs)
    922         elif func is None:
    923             # nicer error message
--> 924             raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")
    925 
    926         func = _maybe_mangle_lambdas(func)

TypeError: Must provide 'func' or tuples of '(column, aggfunc).

On master the error is raised in line 86 of the same file.

Problem description

The combination of groupby, resample, and interpolate leads to an TypeError: Must provide 'func' or tuples of '(column, aggfunc). [0].

Other functions like ffill, or bfill work without issues.

Expected Output

Output of pd.show_versions()

INSTALLED VERSIONS

commit : None
python : 3.8.2.final.0
python-bits : 64
OS : Linux
OS-release : 4.4.0-18362-Microsoft
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : en_US.UTF-8

pandas : 1.0.5
numpy : 1.19.0
pytz : 2020.1
dateutil : 2.8.1
pip : 20.0.2
setuptools : 44.0.0
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : 2.8.5 (dt dec pq3 ext lo64)
jinja2 : 2.11.2
IPython : 7.16.1
pandas_datareader: None
bs4 : None
bottleneck : 1.3.2
fastparquet : None
gcsfs : None
lxml.etree : None
matplotlib : 3.2.2
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pytables : None
pytest : None
pyxlsb : None
s3fs : None
scipy : 1.5.0
sqlalchemy : 1.3.18
tables : None
tabulate : None
xarray : None
xlrd : None
xlwt : None
xlsxwriter : None
numba : None

[0], btw, there is a quote missing in the error message.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions