diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 89b79d2e04194..e193213db12a9 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -65,6 +65,10 @@ Copy-on-Write improvements - The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary of Index objects and specifying ``copy=False``, will now use a lazy copy of those Index objects for the columns of the DataFrame (:issue:`52947`) +- A shallow copy of a Series or DataFrame (``df.copy(deep=False)``) will now also return + a shallow copy of the rows/columns ``Index`` objects instead of only a shallow copy of + the data, i.e. the index of the result is no longer identical + (``df.copy(deep=False).index is df.index`` is no longer True) (:issue:`53721`) - :meth:`DataFrame.head` and :meth:`DataFrame.tail` will now return deep copies (:issue:`54011`) - Add lazy copy mechanism to :meth:`DataFrame.eval` (:issue:`53746`) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2146d1f2cef16..05577fb971061 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -547,7 +547,10 @@ def copy_func(ax): new_axes = [copy_func(ax) for ax in self.axes] else: - new_axes = list(self.axes) + if using_copy_on_write(): + new_axes = [ax.view() for ax in self.axes] + else: + new_axes = list(self.axes) res = self.apply("copy", deep=deep) res.axes = new_axes diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index f5dc8d7ee0f80..dbdd832f34aa4 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -22,6 +22,12 @@ def test_copy(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_copy = df.copy() + # the deep copy by defaults takes a shallow copy of the Index + assert df_copy.index is not df.index + assert df_copy.columns is not df.columns + assert df_copy.index.is_(df.index) + assert df_copy.columns.is_(df.columns) + # the deep copy doesn't share memory assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) if using_copy_on_write: @@ -37,6 +43,16 @@ def test_copy_shallow(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_copy = df.copy(deep=False) + # the shallow copy also makes a shallow copy of the index + if using_copy_on_write: + assert df_copy.index is not df.index + assert df_copy.columns is not df.columns + assert df_copy.index.is_(df.index) + assert df_copy.columns.is_(df.columns) + else: + assert df_copy.index is df.index + assert df_copy.columns is df.columns + # the shallow copy still shares memory assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) if using_copy_on_write: diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 7f38def847c45..0858e33a989b7 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -607,7 +607,7 @@ def test_reindex_sparse(self): ) tm.assert_frame_equal(result, expected) - def test_reindex(self, float_frame): + def test_reindex(self, float_frame, using_copy_on_write): datetime_series = tm.makeTimeSeries(nper=30) newFrame = float_frame.reindex(datetime_series.index) @@ -647,7 +647,10 @@ def test_reindex(self, float_frame): # Same index, copies values but not index if copy=False newFrame = float_frame.reindex(float_frame.index, copy=False) - assert newFrame.index is float_frame.index + if using_copy_on_write: + assert newFrame.index.is_(float_frame.index) + else: + assert newFrame.index is float_frame.index # length zero newFrame = float_frame.reindex([]) diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py index 6f0e3522c3a66..afb53bf2de93a 100644 --- a/pandas/tests/frame/test_npfuncs.py +++ b/pandas/tests/frame/test_npfuncs.py @@ -22,8 +22,8 @@ def test_np_sqrt(self, float_frame): with np.errstate(all="ignore"): result = np.sqrt(float_frame) assert isinstance(result, type(float_frame)) - assert result.index is float_frame.index - assert result.columns is float_frame.columns + assert result.index.is_(float_frame.index) + assert result.columns.is_(float_frame.columns) tm.assert_frame_equal(result, float_frame.apply(np.sqrt)) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index f113d1d52bd6c..f835bb953ce6c 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -114,14 +114,14 @@ def test_concat_copy_index_frame(self, axis, using_copy_on_write): df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) comb = concat([df, df], axis=axis, copy=True) if not using_copy_on_write: - assert comb.index is not df.index - assert comb.columns is not df.columns + assert not comb.index.is_(df.index) + assert not comb.columns.is_(df.columns) elif axis in [0, "index"]: - assert comb.index is not df.index - assert comb.columns is df.columns + assert not comb.index.is_(df.index) + assert comb.columns.is_(df.columns) elif axis in [1, "columns"]: - assert comb.index is df.index - assert comb.columns is not df.columns + assert comb.index.is_(df.index) + assert not comb.columns.is_(df.columns) def test_default_index(self): # is_series and ignore_index diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index 82261be61f9d1..e1b3dd4888ef5 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -127,16 +127,18 @@ def test_align_nocopy(datetime_series, using_copy_on_write): def test_align_same_index(datetime_series, using_copy_on_write): a, b = datetime_series.align(datetime_series, copy=False) - assert a.index is datetime_series.index - assert b.index is datetime_series.index - - a, b = datetime_series.align(datetime_series, copy=True) if not using_copy_on_write: - assert a.index is not datetime_series.index - assert b.index is not datetime_series.index - else: assert a.index is datetime_series.index assert b.index is datetime_series.index + else: + assert a.index.is_(datetime_series.index) + assert b.index.is_(datetime_series.index) + + a, b = datetime_series.align(datetime_series, copy=True) + assert a.index is not datetime_series.index + assert b.index is not datetime_series.index + assert a.index.is_(datetime_series.index) + assert b.index.is_(datetime_series.index) def test_align_multiindex(): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 717dfcebfd1fd..0cdd8dfb62bec 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -628,12 +628,15 @@ def test_constructor_maskedarray_hardened(self): expected = Series([np.nan, np.nan, np.nan]) tm.assert_series_equal(result, expected) - def test_series_ctor_plus_datetimeindex(self): + def test_series_ctor_plus_datetimeindex(self, using_copy_on_write): rng = date_range("20090415", "20090519", freq="B") data = {k: 1 for k in rng} result = Series(data, index=rng) - assert result.index is rng + if using_copy_on_write: + assert result.index.is_(rng) + else: + assert result.index is rng def test_constructor_default_index(self): s = Series([0, 1, 2]) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 453e34abacfd2..6644ec82fab17 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -46,20 +46,26 @@ def test_reindex(self, multiindex_dataframe_random_data): tm.assert_frame_equal(reindexed, expected) def test_reindex_preserve_levels( - self, multiindex_year_month_day_dataframe_random_data + self, multiindex_year_month_day_dataframe_random_data, using_copy_on_write ): ymd = multiindex_year_month_day_dataframe_random_data new_index = ymd.index[::10] chunk = ymd.reindex(new_index) - assert chunk.index is new_index + if using_copy_on_write: + assert chunk.index.is_(new_index) + else: + assert chunk.index is new_index chunk = ymd.loc[new_index] assert chunk.index.equals(new_index) ymdT = ymd.T chunk = ymdT.reindex(columns=new_index) - assert chunk.columns is new_index + if using_copy_on_write: + assert chunk.columns.is_(new_index) + else: + assert chunk.columns is new_index chunk = ymdT.loc[:, new_index] assert chunk.columns.equals(new_index)