Skip to content

Commit 9aaf67c

Browse files
committed
BUG: Bug in groupby transform with a datetime-like grouper (GH5712)
1 parent 986bda2 commit 9aaf67c

File tree

3 files changed

+20
-4
lines changed

3 files changed

+20
-4
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,7 @@ Bug Fixes
823823
- Work around regression in numpy 1.7.0 which erroneously raises IndexError from ``ndarray.item`` (:issue:`5666`)
824824
- Bug in repeated indexing of object with resultant non-unique index (:issue:`5678`)
825825
- Bug in fillna with Series and a passed series/dict (:issue:`5703`)
826+
- Bug in groupby transform with a datetime-like grouper (:issue:`5712`)
826827

827828
pandas 0.12.0
828829
-------------

pandas/core/groupby.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
notnull, _DATELIKE_DTYPES)
2323

2424
import pandas.lib as lib
25+
from pandas.lib import Timestamp
2526
import pandas.algos as _algos
2627
import pandas.hashtable as _hash
2728

@@ -257,6 +258,12 @@ def indices(self):
257258
""" dict {group name -> group indices} """
258259
return self.grouper.indices
259260

261+
def _get_index(self, name):
262+
""" safe get index """
263+
if isinstance(name, Timestamp):
264+
name = name.value
265+
return self.indices[name]
266+
260267
@property
261268
def name(self):
262269
if self._selection is None:
@@ -350,7 +357,7 @@ def get_group(self, name, obj=None):
350357
if obj is None:
351358
obj = self.obj
352359

353-
inds = self.indices[name]
360+
inds = self._get_index(name)
354361
return obj.take(inds, axis=self.axis, convert=False)
355362

356363
def __iter__(self):
@@ -1821,7 +1828,7 @@ def transform(self, func, *args, **kwargs):
18211828
# need to do a safe put here, as the dtype may be different
18221829
# this needs to be an ndarray
18231830
result = Series(result)
1824-
result.iloc[self.indices[name]] = res
1831+
result.iloc[self._get_index(name)] = res
18251832
result = result.values
18261833

18271834
# downcast if we can (and need)
@@ -1860,7 +1867,7 @@ def true_and_notnull(x, *args, **kwargs):
18601867
return b and notnull(b)
18611868

18621869
try:
1863-
indices = [self.indices[name] if true_and_notnull(group) else []
1870+
indices = [self._get_index(name) if true_and_notnull(group) else []
18641871
for name, group in self]
18651872
except ValueError:
18661873
raise TypeError("the filter must return a boolean result")
@@ -2412,7 +2419,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
24122419
res = path(group)
24132420

24142421
def add_indices():
2415-
indices.append(self.indices[name])
2422+
indices.append(self._get_index(name))
24162423

24172424
# interpret the result of the filter
24182425
if isinstance(res, (bool, np.bool_)):

pandas/tests/test_groupby.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,14 @@ def test_transform_broadcast(self):
627627
for idx in gp.index:
628628
assert_fp_equal(res.xs(idx), agged[idx])
629629

630+
def test_transform_bug(self):
631+
# GH 5712
632+
# transforming on a datetime column
633+
df = DataFrame(dict(A = Timestamp('20130101'), B = np.arange(5)))
634+
result = df.groupby('A')['B'].transform(lambda x: x.rank(ascending=False))
635+
expected = Series(np.arange(5,0,step=-1),name='B')
636+
assert_series_equal(result,expected)
637+
630638
def test_transform_multiple(self):
631639
grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month])
632640

0 commit comments

Comments
 (0)