Skip to content

Commit 1380bb8

Browse files
committed
Merge pull request #3384 from jreback/groupby_mutate
BUG: GH3380 groupby will handle mutation on a DataFrame group's columns
2 parents c301433 + 4bb5f6d commit 1380bb8

File tree

3 files changed

+30
-1
lines changed

3 files changed

+30
-1
lines changed

RELEASE.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,8 @@ pandas 0.11.0
298298
- Fix set_index segfault when passing MultiIndex (GH3308_)
299299
- Ensure pickles created in py2 can be read in py3
300300
- Insert ellipsis in MultiIndex summary repr (GH3348_)
301+
- Groupby will handle mutation among an input groups columns (and fallback
302+
to non-fast apply) (GH3380_)
301303

302304
.. _GH3294: https://github.com/pydata/pandas/issues/3294
303305
.. _GH622: https://github.com/pydata/pandas/issues/622
@@ -409,6 +411,7 @@ pandas 0.11.0
409411
.. _GH2919: https://github.com/pydata/pandas/issues/2919
410412
.. _GH3308: https://github.com/pydata/pandas/issues/3308
411413
.. _GH3311: https://github.com/pydata/pandas/issues/3311
414+
.. _GH3380: https://github.com/pydata/pandas/issues/3380
412415

413416
pandas 0.10.1
414417
=============

pandas/core/groupby.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,9 @@ def apply(self, f, data, axis=0, keep_internal=False):
620620
try:
621621
values, mutated = splitter.fast_apply(f, group_keys)
622622
return group_keys, values, mutated
623-
except lib.InvalidApply:
623+
except (Exception), detail:
624+
# we detect a mutatation of some kind
625+
# so take slow path
624626
pass
625627

626628
result_values = []

pandas/tests/test_groupby.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,6 +1491,30 @@ def f(group):
14911491
for key, group in grouped:
14921492
assert_frame_equal(result.ix[key], f(group))
14931493

1494+
def test_mutate_groups(self):
1495+
1496+
# GH3380
1497+
1498+
mydf = DataFrame({
1499+
'cat1' : ['a'] * 8 + ['b'] * 6,
1500+
'cat2' : ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 + ['d'] * 2 + ['e'] * 2,
1501+
'cat3' : map(lambda x: 'g%s' % x, range(1,15)),
1502+
'val' : np.random.randint(100, size=14),
1503+
})
1504+
1505+
def f_copy(x):
1506+
x = x.copy()
1507+
x['rank'] = x.val.rank(method='min')
1508+
return x.groupby('cat2')['rank'].min()
1509+
1510+
def f_no_copy(x):
1511+
x['rank'] = x.val.rank(method='min')
1512+
return x.groupby('cat2')['rank'].min()
1513+
1514+
grpby_copy = mydf.groupby('cat1').apply(f_copy)
1515+
grpby_no_copy = mydf.groupby('cat1').apply(f_no_copy)
1516+
assert_series_equal(grpby_copy,grpby_no_copy)
1517+
14941518
def test_apply_chunk_view(self):
14951519
# Low level tinkering could be unsafe, make sure not
14961520
df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3],

0 commit comments

Comments
 (0)