Closed
Description
resampling has been fixed, so this is only with 2 or more groupers (#6516)
In [9]:
df = DataFrame({
'date' : pd.to_datetime([
'20121002','20121007','20130130','20130202','20130305','20121002',
'20121207','20130130','20130202','20130305','20130202','20130305']),
'user_id' : [1,1,1,1,1,3,3,3,5,5,5,5],
'whole_cost' : [1790,364,280,259,201,623,90,312,359,301,359,801],
'cost1' : [12,15,10,24,39,1,0,90,45,34,1,12] }).set_index('date')
expected = df.groupby('user_id')['whole_cost'].resample(
'M', how='sum').dropna().reorder_levels(['date','user_id']).sortlevel().astype('int64')
expected.name = 'whole_cost'
In [10]: expected
Out[10]:
date user_id
2012-10-31 1 2154
3 623
2012-12-31 3 90
2013-01-31 1 280
3 312
2013-02-28 1 259
5 718
2013-03-31 1 201
5 1102
Name: whole_cost, dtype: int64
These should be equivalent
In [11]: df.sort_index().groupby([pd.TimeGrouper(freq='M'), 'user_id'])['whole_cost'].sum()
Out[11]:
date user_id
2012-10-31 1 2154
3 623
2012-12-31 3 90
2013-01-31 1 280
3 312
2013-02-28 1 259
5 718
2013-03-31 1 201
5 1102
Name: whole_cost, dtype: int64
In [13]: df.groupby([pd.TimeGrouper(freq='M'), 'user_id'])['whole_cost'].sum()
ValueError: cannot reindex from a duplicate axis