diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index bcbe2c6d8b104..8aa4878288507 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -297,6 +297,7 @@ Other enhancements - :meth:`DataFrame.quantile` gained a ``method`` argument that can accept ``table`` to evaluate multi-column quantiles (:issue:`43881`) - :class:`Interval` now supports checking whether one interval is contained by another interval (:issue:`46613`) - Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`) +- :meth:`DataFrame.droplevel` and :meth:`Series.droplevel` support a ``copy`` argument. if ``False``, the underlying data is not copied (:issue:`48117`) - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support a ``copy`` argument. If ``False``, the underlying data is not copied in the returned object (:issue:`47934`) - :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`) - The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 88184285d3683..e3ee68ee0fa87 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -855,7 +855,9 @@ def swapaxes( @final @doc(klass=_shared_doc_kwargs["klass"]) - def droplevel(self: NDFrameT, level: IndexLabel, axis: Axis = 0) -> NDFrameT: + def droplevel( + self: NDFrameT, level: IndexLabel, axis: Axis = 0, copy: bool_t = True + ) -> NDFrameT: """ Return {klass} with requested index / column level(s) removed. @@ -874,6 +876,11 @@ def droplevel(self: NDFrameT, level: IndexLabel, axis: Axis = 0) -> NDFrameT: For `Series` this parameter is unused and defaults to 0. + copy : bool, default True + Whether to make a copy of the underlying data. + + .. versionadded:: 1.5.0 + Returns ------- {klass} @@ -916,7 +923,7 @@ def droplevel(self: NDFrameT, level: IndexLabel, axis: Axis = 0) -> NDFrameT: """ labels = self._get_axis(axis) new_labels = labels.droplevel(level) - return self.set_axis(new_labels, axis=axis) + return self.set_axis(new_labels, axis=axis, copy=copy) def pop(self, item: Hashable) -> Series | Any: result = self[item] diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 867835ef7f0a3..4bc3559c42401 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -241,7 +241,7 @@ def __internal_pivot_table( # discard the top level if values_passed and not values_multi and table.columns.nlevels > 1: - table = table.droplevel(0, axis=1) + table = table.droplevel(0, axis=1, copy=False) if len(index) == 0 and len(columns) > 0: table = table.T diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 0270a5dd75952..047774f22a9f5 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -535,10 +535,7 @@ def _unstack_extension_series(series: Series, level, fill_value) -> DataFrame: df = series.to_frame() result = df.unstack(level=level, fill_value=fill_value) - # equiv: result.droplevel(level=0, axis=1) - # but this avoids an extra copy - result.columns = result.columns.droplevel(0) - return result + return result.droplevel(level=0, axis=1, copy=False) def stack(frame: DataFrame, level=-1, dropna: bool = True): diff --git a/pandas/tests/frame/methods/test_droplevel.py b/pandas/tests/frame/methods/test_droplevel.py index e1302d4b73f2b..3b967b63a9b91 100644 --- a/pandas/tests/frame/methods/test_droplevel.py +++ b/pandas/tests/frame/methods/test_droplevel.py @@ -34,3 +34,28 @@ def test_droplevel(self, frame_or_series): # test that droplevel raises ValueError on axis != 0 with pytest.raises(ValueError, match="No axis named columns"): df.droplevel(1, axis="columns") + + def test_droplevel_copy(self, frame_or_series): + cols = MultiIndex.from_tuples( + [("c", "e"), ("d", "f")], names=["level_1", "level_2"] + ) + mi = MultiIndex.from_tuples([(1, 2), (5, 6), (9, 10)], names=["a", "b"]) + df = DataFrame([[3, 4], [7, 8], [11, 12]], index=mi, columns=cols) + if frame_or_series is not DataFrame: + df = df.iloc[:, 0] + + # Check that we DID make a copy + res = df.droplevel("a", axis="index", copy=True) + if frame_or_series is DataFrame: + for i in range(df.shape[1]): + assert not tm.shares_memory(df.iloc[:, i], res.iloc[:, i]) + else: + assert not tm.shares_memory(res, df) + + # Check that we did NOT make a copy + res = df.droplevel("a", axis="index", copy=False) + if frame_or_series is DataFrame: + for i in range(df.shape[1]): + assert tm.shares_memory(df.iloc[:, i], res.iloc[:, i]) + else: + assert tm.shares_memory(res, df)