diff --git a/doc/source/release.rst b/doc/source/release.rst index 49656046129ca..7ed01e42ff7aa 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -399,6 +399,7 @@ Bug Fixes - Better error message when passing a frequency of 'MS' in ``Period`` construction (GH5332) - Bug in `Series.__unicode__` when `max_rows` is `None` and the Series has more than 1000 rows. (:issue:`6863`) - Bug in ``groupby.get_group`` where a datetlike wasn't always accepted (:issue:`5267`) +- Bug in ``groupBy.get_group`` created by ``TimeGrouper`` raises ``AttributeError`` (:issue:`6914`) - Bug in ``DatetimeIndex.tz_localize`` and ``DatetimeIndex.tz_convert`` affects to NaT (:issue:`5546`) - Bug in arithmetic operations affecting to NaT (:issue:`6873`) - Bug in ``Series.str.extract`` where the resulting ``Series`` from a single diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index c0222ad248e0c..494251ee97044 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2,6 +2,7 @@ from functools import wraps import numpy as np import datetime +import collections from pandas.compat import( zip, builtins, range, long, lrange, lzip, @@ -1556,6 +1557,17 @@ def apply(self, f, data, axis=0): return result_keys, result_values, mutated + @cache_readonly + def indices(self): + indices = collections.defaultdict(list) + + i = 0 + for label, bin in zip(self.binlabels, self.bins): + if i < bin: + indices[label] = list(range(i, bin)) + i = bin + return indices + @cache_readonly def ngroups(self): return len(self.binlabels) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 22d92c7b19fe1..fde9156017c4e 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -3140,6 +3140,55 @@ def test_timegrouper_with_reg_groups(self): result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])['whole_cost'].sum() assert_series_equal(result2, expected) + def test_timegrouper_get_group(self): + # GH 6914 + + df_original = DataFrame({ + 'Buyer': 'Carl Joe Joe Carl Joe Carl'.split(), + 'Quantity': [18,3,5,1,9,3], + 'Date' : [datetime(2013,9,1,13,0), datetime(2013,9,1,13,5), + datetime(2013,10,1,20,0), datetime(2013,10,3,10,0), + datetime(2013,12,2,12,0), datetime(2013,9,2,14,0),]}) + df_reordered = df_original.sort(columns='Quantity') + + # single grouping + expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], + df_original.iloc[[4]]] + dt_list = ['2013-09-30', '2013-10-31', '2013-12-31'] + + for df in [df_original, df_reordered]: + grouped = df.groupby(pd.Grouper(freq='M', key='Date')) + for t, expected in zip(dt_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group(dt) + assert_frame_equal(result, expected) + + # multiple grouping + expected_list = [df_original.iloc[[1]], df_original.iloc[[3]], + df_original.iloc[[4]]] + g_list = [('Joe', '2013-09-30'), ('Carl', '2013-10-31'), ('Joe', '2013-12-31')] + + for df in [df_original, df_reordered]: + grouped = df.groupby(['Buyer', pd.Grouper(freq='M', key='Date')]) + for (b, t), expected in zip(g_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group((b, dt)) + assert_frame_equal(result, expected) + + # with index + df_original = df_original.set_index('Date') + df_reordered = df_original.sort(columns='Quantity') + + expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], + df_original.iloc[[4]]] + + for df in [df_original, df_reordered]: + grouped = df.groupby(pd.Grouper(freq='M')) + for t, expected in zip(dt_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group(dt) + assert_frame_equal(result, expected) + def test_cumcount(self): df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A']) g = df.groupby('A')