diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 40b068547c360..cd52c2555df0e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -591,3 +591,4 @@ Bug Fixes - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) +- Bug in ``.reset_index()`` which caused ``reset_index`` for a ``MultiIndex`` to fail if one part of the index was all ``NaN``'s (:issue:`6322`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ebdf72a5cde9..16621d16607bd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2973,7 +2973,13 @@ def _maybe_casted_values(index, labels=None): # if we have the labels, extract the values with a mask if labels is not None: mask = labels == -1 - values = values.take(labels) + # we can have situations where the whole mask is -1, + # meaning there is nothing found in labels, so make all nan's + if mask.all(): + values = (np.nan * mask).values() + else: + values = values.take(labels) + if mask.any(): values, changed = _maybe_upcast_putmask(values, mask, np.nan) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index e84bb6407fafc..68dce9831bd80 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -624,6 +624,33 @@ def test_reset_index_multiindex_col(self): ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) + def test_reset_index_multiindex_nan(self): + # GH6322, testing reset_index on MultiIndexes + # when we have a nan or all nan + df = pd.DataFrame({'A' : ['a', 'b', 'c'], + 'B' : [0, 1, np.nan], + 'C' : np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A' : [np.nan, 'b', 'c'], + 'B' : [0, 1, 2], + 'C' : np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A' : ['a', 'b', 'c'], + 'B' : [0, 1, 2], + 'C' : [np.nan, 1.1, 2.2]}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + + df = pd.DataFrame({'A' : ['a', 'b', 'c'], + 'B' : [np.nan, np.nan, np.nan], + 'C' : np.random.rand(3)}) + rs = df.set_index(['A', 'B']).reset_index() + assert_frame_equal(rs, df) + def test_reset_index_with_datetimeindex_cols(self): # GH5818 #