diff --git a/doc/source/release.rst b/doc/source/release.rst index ccc34a4051508..9c448aa7083c8 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -814,6 +814,7 @@ Bug Fixes - Bug in delitem on a Series (:issue:`5542`) - Bug fix in apply when using custom function and objects are not mutated (:issue:`5545`) - Bug in selecting from a non-unique index with ``loc`` (:issue:`5553`) + - Bug in groupby returning non-consistent types when user function returns a ``None``, (:issue:`5592`) pandas 0.12.0 ------------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 1d5691edb6313..98c17e4f424f5 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2122,11 +2122,23 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): else: key_index = Index(keys, name=key_names[0]) - if isinstance(values[0], (np.ndarray, Series)): - if isinstance(values[0], Series): + + # make Nones an empty object + if com._count_not_none(*values) != len(values): + v = None + for v in values: + if v is not None: + break + if v is None: + return DataFrame() + values = [ x if x is not None else v._constructor(**v._construct_axes_dict()) for x in values ] + + v = values[0] + + if isinstance(v, (np.ndarray, Series)): + if isinstance(v, Series): applied_index = self.obj._get_axis(self.axis) - all_indexed_same = _all_indexes_same([x.index - for x in values]) + all_indexed_same = _all_indexes_same([x.index for x in values ]) singular_series = (len(values) == 1 and applied_index.nlevels == 1) @@ -2165,13 +2177,13 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): stacked_values = np.vstack([np.asarray(x) for x in values]) - columns = values[0].index + columns = v.index index = key_index else: stacked_values = np.vstack([np.asarray(x) for x in values]).T - index = values[0].index + index = v.index columns = key_index except (ValueError, AttributeError): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 1ee7268c0ca82..51f608e20c738 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -7,7 +7,7 @@ from datetime import datetime from numpy import nan -from pandas import bdate_range, Timestamp +from pandas import date_range,bdate_range, Timestamp from pandas.core.index import Index, MultiIndex, Int64Index from pandas.core.common import rands from pandas.core.api import Categorical, DataFrame @@ -259,7 +259,7 @@ def test_groupby_bounds_check(self): def test_groupby_grouper_f_sanity_checked(self): import pandas as pd - dates = pd.date_range('01-Jan-2013', periods=12, freq='MS') + dates = date_range('01-Jan-2013', periods=12, freq='MS') ts = pd.TimeSeries(np.random.randn(12), index=dates) # GH3035 @@ -320,6 +320,34 @@ def func(dataf): result = df.groupby('X',squeeze=False).count() tm.assert_isinstance(result,DataFrame) + # GH5592 + # inconcistent return type + df = DataFrame(dict(A = [ 'Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb', 'Pony', 'Pony' ], + B = np.arange(7))) + def f(grp): + return grp.iloc[0] + expected = df.groupby('A').first() + result = df.groupby('A').apply(f)[['B']] + assert_frame_equal(result,expected) + + def f(grp): + if grp.name == 'Tiger': + return None + return grp.iloc[0] + result = df.groupby('A').apply(f)[['B']] + e = expected.copy() + e.loc['Tiger'] = np.nan + assert_frame_equal(result,e) + + def f(grp): + if grp.name == 'Pony': + return None + return grp.iloc[0] + result = df.groupby('A').apply(f)[['B']] + e = expected.copy() + e.loc['Pony'] = np.nan + assert_frame_equal(result,e) + def test_agg_regression1(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.agg(np.mean)