diff --git a/RELEASE.rst b/RELEASE.rst index 5fa0234041227..55fb085b7bdee 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -298,6 +298,8 @@ pandas 0.11.0 - Fix set_index segfault when passing MultiIndex (GH3308_) - Ensure pickles created in py2 can be read in py3 - Insert ellipsis in MultiIndex summary repr (GH3348_) + - Groupby will handle mutation among an input groups columns (and fallback + to non-fast apply) (GH3380_) .. _GH3294: https://github.com/pydata/pandas/issues/3294 .. _GH622: https://github.com/pydata/pandas/issues/622 @@ -409,6 +411,7 @@ pandas 0.11.0 .. _GH2919: https://github.com/pydata/pandas/issues/2919 .. _GH3308: https://github.com/pydata/pandas/issues/3308 .. _GH3311: https://github.com/pydata/pandas/issues/3311 +.. _GH3380: https://github.com/pydata/pandas/issues/3380 pandas 0.10.1 ============= diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 8b3fb4c2fba0d..aef44bd91396d 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -620,7 +620,9 @@ def apply(self, f, data, axis=0, keep_internal=False): try: values, mutated = splitter.fast_apply(f, group_keys) return group_keys, values, mutated - except lib.InvalidApply: + except (Exception), detail: + # we detect a mutatation of some kind + # so take slow path pass result_values = [] diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 139a7cace83a7..4604678d58d5a 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1491,6 +1491,30 @@ def f(group): for key, group in grouped: assert_frame_equal(result.ix[key], f(group)) + def test_mutate_groups(self): + + # GH3380 + + mydf = DataFrame({ + 'cat1' : ['a'] * 8 + ['b'] * 6, + 'cat2' : ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 + ['d'] * 2 + ['e'] * 2, + 'cat3' : map(lambda x: 'g%s' % x, range(1,15)), + 'val' : np.random.randint(100, size=14), + }) + + def f_copy(x): + x = x.copy() + x['rank'] = x.val.rank(method='min') + return x.groupby('cat2')['rank'].min() + + def f_no_copy(x): + x['rank'] = x.val.rank(method='min') + return x.groupby('cat2')['rank'].min() + + grpby_copy = mydf.groupby('cat1').apply(f_copy) + grpby_no_copy = mydf.groupby('cat1').apply(f_no_copy) + assert_series_equal(grpby_copy,grpby_no_copy) + def test_apply_chunk_view(self): # Low level tinkering could be unsafe, make sure not df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3],