diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index b23da6ee4b806..da712f84eb1b5 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -394,6 +394,7 @@ Reshaping - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`) - :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`) - Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`) +- Bug in :func:`pivot_table` where columns with ``NaN`` values are dropped even if ``dropna`` argument is ``False``, when the ``aggfunc`` argument contains a ``list`` (:issue:`22159`) - Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`). - Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`) - Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 1fa69b9b5021a..580810f3b6829 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -34,7 +34,8 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = pivot_table(data, values=values, index=index, columns=columns, fill_value=fill_value, aggfunc=func, - margins=margins, margins_name=margins_name) + margins=margins, dropna=dropna, + margins_name=margins_name) pieces.append(table) keys.append(getattr(func, '__name__', func)) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 24475cc0c8fb3..1ee2ebf5de34e 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1288,6 +1288,55 @@ def test_pivot_number_of_levels_larger_than_int32(self): df.pivot_table(index='ind1', columns='ind2', values='count', aggfunc='count') + def test_pivot_table_aggfunc_dropna(self, dropna): + # GH 22159 + df = pd.DataFrame({'fruit': ['apple', 'peach', 'apple'], + 'size': [1, 1, 2], + 'taste': [7, 6, 6]}) + + def ret_one(x): + return 1 + + def ret_sum(x): + return sum(x) + + def ret_none(x): + return np.nan + + result = pd.pivot_table(df, columns='fruit', + aggfunc=[ret_sum, ret_none, ret_one], + dropna=dropna) + + data = [[3, 1, np.nan, np.nan, 1, 1], [13, 6, np.nan, np.nan, 1, 1]] + col = pd.MultiIndex.from_product([['ret_sum', 'ret_none', 'ret_one'], + ['apple', 'peach']], + names=[None, 'fruit']) + expected = pd.DataFrame(data, index=['size', 'taste'], columns=col) + + if dropna: + expected = expected.dropna(axis='columns') + + tm.assert_frame_equal(result, expected) + + def test_pivot_table_aggfunc_scalar_dropna(self, dropna): + # GH 22159 + df = pd.DataFrame({'A': ['one', 'two', 'one'], + 'x': [3, np.nan, 2], + 'y': [1, np.nan, np.nan]}) + + result = pd.pivot_table(df, columns='A', + aggfunc=np.mean, + dropna=dropna) + + data = [[2.5, np.nan], [1, np.nan]] + col = pd.Index(['one', 'two'], name='A') + expected = pd.DataFrame(data, index=['x', 'y'], columns=col) + + if dropna: + expected = expected.dropna(axis='columns') + + tm.assert_frame_equal(result, expected) + class TestCrosstab(object):