diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 504f8004bc8a6..f14a08876b6e8 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -42,7 +42,7 @@ Conversion Indexing ^^^^^^^^ - +- Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`) I/O diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e6ea58e7e05be..8d437102e4d18 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3012,12 +3012,12 @@ def _maybe_casted_values(index, labels=None): return values new_index = _default_index(len(new_obj)) - if isinstance(self.index, MultiIndex): - if level is not None: - if not isinstance(level, (tuple, list)): - level = [level] - level = [self.index._get_level_number(lev) for lev in level] - if len(level) < len(self.index.levels): + if level is not None: + if not isinstance(level, (tuple, list)): + level = [level] + level = [self.index._get_level_number(lev) for lev in level] + if isinstance(self.index, MultiIndex): + if len(level) < self.index.nlevels: new_index = self.index.droplevel(level) if not drop: @@ -3033,6 +3033,8 @@ def _maybe_casted_values(index, labels=None): multi_col = isinstance(self.columns, MultiIndex) for i, (lev, lab) in reversed(list(enumerate(to_insert))): + if not (level is None or i in level): + continue name = names[i] if multi_col: col_name = (list(name) if isinstance(name, tuple) @@ -3049,11 +3051,9 @@ def _maybe_casted_values(index, labels=None): missing = self.columns.nlevels - len(name_lst) name_lst += [col_fill] * missing name = tuple(name_lst) - # to ndarray and maybe infer different dtype level_values = _maybe_casted_values(lev, lab) - if level is None or i in level: - new_obj.insert(0, name, level_values) + new_obj.insert(0, name, level_values) new_obj.index = new_index if not inplace: diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index e6313dfc602a8..fbfbcc14e9150 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -641,6 +641,43 @@ def test_reset_index(self): xp = xp.set_index(['B'], append=True) assert_frame_equal(rs, xp, check_names=False) + def test_reset_index_level(self): + df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=['A', 'B', 'C', 'D']) + + for levels in ['A', 'B'], [0, 1]: + # With MultiIndex + result = df.set_index(['A', 'B']).reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = df.set_index(['A', 'B']).reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = df.set_index(['A', 'B']).reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(['A', 'B']).reset_index(level=levels, + drop=True) + tm.assert_frame_equal(result, df[['C', 'D']]) + + # With single-level Index (GH 16263) + result = df.set_index('A').reset_index(level=levels[0]) + tm.assert_frame_equal(result, df) + + result = df.set_index('A').reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df) + + result = df.set_index(['A']).reset_index(level=levels[0], + drop=True) + tm.assert_frame_equal(result, df[['B', 'C', 'D']]) + + # Missing levels - for both MultiIndex and single-level Index: + for idx_lev in ['A', 'B'], ['A']: + with tm.assert_raises_regex(KeyError, 'Level E '): + df.set_index(idx_lev).reset_index(level=['A', 'E']) + with tm.assert_raises_regex(IndexError, 'Too many levels'): + df.set_index(idx_lev).reset_index(level=[0, 1, 2]) + def test_reset_index_right_dtype(self): time = np.arange(0.0, 10, np.sqrt(2) / 2) s1 = Series((9.81 * time ** 2) / 2, diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 150767ee9e2b2..98ae749aaa10e 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -141,6 +141,45 @@ def test_reset_index(self): tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) assert isinstance(rs, Series) + def test_reset_index_level(self): + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], + columns=['A', 'B', 'C']) + + for levels in ['A', 'B'], [0, 1]: + # With MultiIndex + s = df.set_index(['A', 'B'])['C'] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index('B')) + + result = s.reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(['A', 'B']).reset_index(level=levels, + drop=True) + tm.assert_frame_equal(result, df[['C']]) + + with tm.assert_raises_regex(KeyError, 'Level E '): + s.reset_index(level=['A', 'E']) + + # With single-level Index + s = df.set_index('A')['B'] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df[['A', 'B']]) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df[['A', 'B']]) + + result = s.reset_index(level=levels[0], drop=True) + tm.assert_series_equal(result, df['B']) + + with tm.assert_raises_regex(IndexError, 'Too many levels'): + s.reset_index(level=[0, 1, 2]) + def test_reset_index_range(self): # GH 12071 s = pd.Series(range(2), name='A', dtype='int64')