Skip to content

Commit d5ef4eb

Browse files
committed
Merge pull request #5593 from jreback/groupby_apply
BUG: Bug in groupby returning non-consistent types when user function returns a None, (GH5992)
2 parents b1485a3 + 9aae1a8 commit d5ef4eb

File tree

3 files changed

+49
-8
lines changed

3 files changed

+49
-8
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,7 @@ Bug Fixes
814814
- Bug in delitem on a Series (:issue:`5542`)
815815
- Bug fix in apply when using custom function and objects are not mutated (:issue:`5545`)
816816
- Bug in selecting from a non-unique index with ``loc`` (:issue:`5553`)
817+
- Bug in groupby returning non-consistent types when user function returns a ``None``, (:issue:`5592`)
817818

818819
pandas 0.12.0
819820
-------------

pandas/core/groupby.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2122,11 +2122,23 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
21222122
else:
21232123
key_index = Index(keys, name=key_names[0])
21242124

2125-
if isinstance(values[0], (np.ndarray, Series)):
2126-
if isinstance(values[0], Series):
2125+
2126+
# make Nones an empty object
2127+
if com._count_not_none(*values) != len(values):
2128+
v = None
2129+
for v in values:
2130+
if v is not None:
2131+
break
2132+
if v is None:
2133+
return DataFrame()
2134+
values = [ x if x is not None else v._constructor(**v._construct_axes_dict()) for x in values ]
2135+
2136+
v = values[0]
2137+
2138+
if isinstance(v, (np.ndarray, Series)):
2139+
if isinstance(v, Series):
21272140
applied_index = self.obj._get_axis(self.axis)
2128-
all_indexed_same = _all_indexes_same([x.index
2129-
for x in values])
2141+
all_indexed_same = _all_indexes_same([x.index for x in values ])
21302142
singular_series = (len(values) == 1 and
21312143
applied_index.nlevels == 1)
21322144

@@ -2165,13 +2177,13 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
21652177

21662178
stacked_values = np.vstack([np.asarray(x)
21672179
for x in values])
2168-
columns = values[0].index
2180+
columns = v.index
21692181
index = key_index
21702182
else:
21712183
stacked_values = np.vstack([np.asarray(x)
21722184
for x in values]).T
21732185

2174-
index = values[0].index
2186+
index = v.index
21752187
columns = key_index
21762188

21772189
except (ValueError, AttributeError):

pandas/tests/test_groupby.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from datetime import datetime
88
from numpy import nan
99

10-
from pandas import bdate_range, Timestamp
10+
from pandas import date_range,bdate_range, Timestamp
1111
from pandas.core.index import Index, MultiIndex, Int64Index
1212
from pandas.core.common import rands
1313
from pandas.core.api import Categorical, DataFrame
@@ -259,7 +259,7 @@ def test_groupby_bounds_check(self):
259259

260260
def test_groupby_grouper_f_sanity_checked(self):
261261
import pandas as pd
262-
dates = pd.date_range('01-Jan-2013', periods=12, freq='MS')
262+
dates = date_range('01-Jan-2013', periods=12, freq='MS')
263263
ts = pd.TimeSeries(np.random.randn(12), index=dates)
264264

265265
# GH3035
@@ -320,6 +320,34 @@ def func(dataf):
320320
result = df.groupby('X',squeeze=False).count()
321321
tm.assert_isinstance(result,DataFrame)
322322

323+
# GH5592
324+
# inconcistent return type
325+
df = DataFrame(dict(A = [ 'Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb', 'Pony', 'Pony' ],
326+
B = np.arange(7)))
327+
def f(grp):
328+
return grp.iloc[0]
329+
expected = df.groupby('A').first()
330+
result = df.groupby('A').apply(f)[['B']]
331+
assert_frame_equal(result,expected)
332+
333+
def f(grp):
334+
if grp.name == 'Tiger':
335+
return None
336+
return grp.iloc[0]
337+
result = df.groupby('A').apply(f)[['B']]
338+
e = expected.copy()
339+
e.loc['Tiger'] = np.nan
340+
assert_frame_equal(result,e)
341+
342+
def f(grp):
343+
if grp.name == 'Pony':
344+
return None
345+
return grp.iloc[0]
346+
result = df.groupby('A').apply(f)[['B']]
347+
e = expected.copy()
348+
e.loc['Pony'] = np.nan
349+
assert_frame_equal(result,e)
350+
323351
def test_agg_regression1(self):
324352
grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month])
325353
result = grouped.agg(np.mean)

0 commit comments

Comments
 (0)