Skip to content

Commit 312f777

Browse files
committed
Merge pull request #5554 from jreback/apply_bug
BUG: Bug fix in apply when using custom function and objects are not mutated (GH5545)
2 parents e5e53ba + 4d45e3f commit 312f777

File tree

4 files changed

+39
-10
lines changed

4 files changed

+39
-10
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,7 @@ Bug Fixes
812812
length to the indexer (:issue:`5508`)
813813
- Bug in getitem with a multi-index and ``iloc`` (:issue:`5528`)
814814
- Bug in delitem on a Series (:issue:`5542`)
815+
- Bug fix in apply when using custom function and objects are not mutated (:issue:`5545`)
815816

816817
pandas 0.12.0
817818
-------------

pandas/core/groupby.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -543,13 +543,13 @@ def head(self, n=5):
543543
544544
>>> df = DataFrame([[1, 2], [1, 4], [5, 6]],
545545
columns=['A', 'B'])
546-
>>> df.groupby('A', as_index=False).head(1)
546+
>>> df.groupby('A', as_index=False).head(1)
547547
A B
548548
0 1 2
549549
2 5 6
550550
>>> df.groupby('A').head(1)
551551
A B
552-
A
552+
A
553553
1 0 1 2
554554
5 2 5 6
555555
@@ -572,16 +572,16 @@ def tail(self, n=5):
572572
573573
>>> df = DataFrame([[1, 2], [1, 4], [5, 6]],
574574
columns=['A', 'B'])
575-
>>> df.groupby('A', as_index=False).tail(1)
575+
>>> df.groupby('A', as_index=False).tail(1)
576576
A B
577577
0 1 2
578578
2 5 6
579579
>>> df.groupby('A').head(1)
580580
A B
581-
A
581+
A
582582
1 0 1 2
583583
5 2 5 6
584-
584+
585585
"""
586586
rng = np.arange(0, -self.grouper._max_groupsize, -1, dtype='int64')
587587
in_tail = self._cumcount_array(rng, ascending=False) > -n
@@ -2149,6 +2149,12 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
21492149
keys, values, not_indexed_same=not_indexed_same
21502150
)
21512151

2152+
# still a series
2153+
# path added as of GH 5545
2154+
elif all_indexed_same:
2155+
from pandas.tools.merge import concat
2156+
return concat(values)
2157+
21522158
if not all_indexed_same:
21532159
return self._concat_objects(
21542160
keys, values, not_indexed_same=not_indexed_same

pandas/src/reduce.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ def apply_frame_axis0(object frame, object f, object names,
541541
# I'm paying the price for index-sharing, ugh
542542
try:
543543
if piece.index is slider.dummy.index:
544-
piece.index = piece.index.copy()
544+
piece = piece.copy()
545545
else:
546546
mutated = True
547547
except AttributeError:

pandas/tests/test_groupby.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,7 +1214,7 @@ def test_groupby_as_index_apply(self):
12141214
res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index
12151215

12161216
# apply doesn't maintain the original ordering
1217-
exp_not_as_apply = Index([0, 2, 1, 4])
1217+
exp_not_as_apply = Index([0, 2, 1, 4])
12181218
exp_as_apply = MultiIndex.from_tuples([(1, 0), (1, 2), (2, 1), (3, 4)])
12191219

12201220
assert_index_equal(res_as_apply, exp_as_apply)
@@ -1845,6 +1845,28 @@ def test_apply_corner(self):
18451845
expected = self.tsframe * 2
18461846
assert_frame_equal(result, expected)
18471847

1848+
def test_apply_without_copy(self):
1849+
# GH 5545
1850+
# returning a non-copy in an applied function fails
1851+
1852+
data = DataFrame({'id_field' : [100, 100, 200, 300], 'category' : ['a','b','c','c'], 'value' : [1,2,3,4]})
1853+
1854+
def filt1(x):
1855+
if x.shape[0] == 1:
1856+
return x.copy()
1857+
else:
1858+
return x[x.category == 'c']
1859+
1860+
def filt2(x):
1861+
if x.shape[0] == 1:
1862+
return x
1863+
else:
1864+
return x[x.category == 'c']
1865+
1866+
expected = data.groupby('id_field').apply(filt1)
1867+
result = data.groupby('id_field').apply(filt2)
1868+
assert_frame_equal(result,expected)
1869+
18481870
def test_apply_use_categorical_name(self):
18491871
from pandas import qcut
18501872
cats = qcut(self.df.C, 4)
@@ -2638,7 +2660,7 @@ def test_cumcount_mi(self):
26382660
expected = Series([0, 1, 2, 0, 3], index=mi)
26392661

26402662
assert_series_equal(expected, g.cumcount())
2641-
assert_series_equal(expected, sg.cumcount())
2663+
assert_series_equal(expected, sg.cumcount())
26422664

26432665
def test_cumcount_groupby_not_col(self):
26442666
df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], index=[0] * 5)
@@ -2895,7 +2917,7 @@ def test_filter_maintains_ordering(self):
28952917
def test_filter_and_transform_with_non_unique_int_index(self):
28962918
# GH4620
28972919
index = [1, 1, 1, 2, 1, 1, 0, 1]
2898-
df = DataFrame({'pid' : [1,1,1,2,2,3,3,3],
2920+
df = DataFrame({'pid' : [1,1,1,2,2,3,3,3],
28992921
'tag' : [23,45,62,24,45,34,25,62]}, index=index)
29002922
grouped_df = df.groupby('tag')
29012923
ser = df['pid']
@@ -2923,7 +2945,7 @@ def test_filter_and_transform_with_non_unique_int_index(self):
29232945
# ^ made manually because this can get confusing!
29242946
assert_series_equal(actual, expected)
29252947

2926-
# Transform Series
2948+
# Transform Series
29272949
actual = grouped_ser.transform(len)
29282950
expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index)
29292951
assert_series_equal(actual, expected)

0 commit comments

Comments
 (0)