Merge pull request #5554 from jreback/apply_bug

jreback · jreback · commit 312f777f034d · 2013-11-20T06:28:49.000-08:00
BUG: Bug fix in apply when using custom function and objects are not mutated (GH5545)
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -812,6 +812,7 @@ Bug Fixes
     length to the indexer (:issue:`5508`)
   - Bug in getitem with a multi-index and ``iloc`` (:issue:`5528`)
   - Bug in delitem on a Series (:issue:`5542`)
+  - Bug fix in apply when using custom function and objects are not mutated (:issue:`5545`)
 
 pandas 0.12.0
 -------------
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -543,13 +543,13 @@ def head(self, n=5):
 
         >>> df = DataFrame([[1, 2], [1, 4], [5, 6]],
                             columns=['A', 'B'])
-        >>> df.groupby('A', as_index=False).head(1) 
+        >>> df.groupby('A', as_index=False).head(1)
            A  B
         0  1  2
         2  5  6
         >>> df.groupby('A').head(1)
              A  B
-        A        
+        A
         1 0  1  2
         5 2  5  6
 
@@ -572,16 +572,16 @@ def tail(self, n=5):
 
         >>> df = DataFrame([[1, 2], [1, 4], [5, 6]],
                             columns=['A', 'B'])
-        >>> df.groupby('A', as_index=False).tail(1) 
+        >>> df.groupby('A', as_index=False).tail(1)
            A  B
         0  1  2
         2  5  6
         >>> df.groupby('A').head(1)
              A  B
-        A        
+        A
         1 0  1  2
         5 2  5  6
-        
+
         """
         rng = np.arange(0, -self.grouper._max_groupsize, -1, dtype='int64')
         in_tail = self._cumcount_array(rng, ascending=False) > -n
@@ -2149,6 +2149,12 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                                 keys, values, not_indexed_same=not_indexed_same
                             )
 
+                        # still a series
+                        # path added as of GH 5545
+                        elif all_indexed_same:
+                            from pandas.tools.merge import concat
+                            return concat(values)
+
                     if not all_indexed_same:
                         return self._concat_objects(
                             keys, values, not_indexed_same=not_indexed_same
diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx
@@ -541,7 +541,7 @@ def apply_frame_axis0(object frame, object f, object names,
             # I'm paying the price for index-sharing, ugh
             try:
                 if piece.index is slider.dummy.index:
-                    piece.index = piece.index.copy()
+                    piece = piece.copy()
                 else:
                     mutated = True
             except AttributeError:
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -1214,7 +1214,7 @@ def test_groupby_as_index_apply(self):
         res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
 
         # apply doesn't maintain the original ordering
-        exp_not_as_apply = Index([0, 2, 1, 4])        
+        exp_not_as_apply = Index([0, 2, 1, 4])
         exp_as_apply = MultiIndex.from_tuples([(1, 0), (1, 2), (2, 1), (3, 4)])
 
         assert_index_equal(res_as_apply, exp_as_apply)
@@ -1845,6 +1845,28 @@ def test_apply_corner(self):
         expected = self.tsframe * 2
         assert_frame_equal(result, expected)
 
+    def test_apply_without_copy(self):
+        # GH 5545
+        # returning a non-copy in an applied function fails
+
+        data = DataFrame({'id_field' : [100, 100, 200, 300], 'category' : ['a','b','c','c'], 'value' : [1,2,3,4]})
+
+        def filt1(x):
+            if x.shape[0] == 1:
+                return x.copy()
+            else:
+                return x[x.category == 'c']
+
+        def filt2(x):
+            if x.shape[0] == 1:
+                return x
+            else:
+                return x[x.category == 'c']
+
+        expected = data.groupby('id_field').apply(filt1)
+        result = data.groupby('id_field').apply(filt2)
+        assert_frame_equal(result,expected)
+
     def test_apply_use_categorical_name(self):
         from pandas import qcut
         cats = qcut(self.df.C, 4)
@@ -2638,7 +2660,7 @@ def test_cumcount_mi(self):
         expected = Series([0, 1, 2, 0, 3], index=mi)
 
         assert_series_equal(expected, g.cumcount())
-        assert_series_equal(expected, sg.cumcount())        
+        assert_series_equal(expected, sg.cumcount())
 
     def test_cumcount_groupby_not_col(self):
         df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], index=[0] * 5)
@@ -2895,7 +2917,7 @@ def test_filter_maintains_ordering(self):
     def test_filter_and_transform_with_non_unique_int_index(self):
         # GH4620
         index = [1, 1, 1, 2, 1, 1, 0, 1]
-        df = DataFrame({'pid' : [1,1,1,2,2,3,3,3], 
+        df = DataFrame({'pid' : [1,1,1,2,2,3,3,3],
                        'tag' : [23,45,62,24,45,34,25,62]}, index=index)
         grouped_df = df.groupby('tag')
         ser = df['pid']
@@ -2923,7 +2945,7 @@ def test_filter_and_transform_with_non_unique_int_index(self):
         # ^ made manually because this can get confusing!
         assert_series_equal(actual, expected)
 
-        # Transform Series 
+        # Transform Series
         actual = grouped_ser.transform(len)
         expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index)
         assert_series_equal(actual, expected)