Skip to content

Commit 782f438

Browse files
API / CoW: shallow copy of DataFrame/Series (.copy(deep=False)) also returns shallow copy of the index/columns (#53722)
* API / CoW: shallow copy of DataFrame/Series (.copy(deep=False)) also returns shallow copy of the index/columns * add whatsnew --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
1 parent 0801953 commit 782f438

File tree

9 files changed

+60
-23
lines changed

9 files changed

+60
-23
lines changed

doc/source/whatsnew/v2.1.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ Copy-on-Write improvements
6565
- The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary
6666
of Index objects and specifying ``copy=False``, will now use a lazy copy
6767
of those Index objects for the columns of the DataFrame (:issue:`52947`)
68+
- A shallow copy of a Series or DataFrame (``df.copy(deep=False)``) will now also return
69+
a shallow copy of the rows/columns ``Index`` objects instead of only a shallow copy of
70+
the data, i.e. the index of the result is no longer identical
71+
(``df.copy(deep=False).index is df.index`` is no longer True) (:issue:`53721`)
6872
- :meth:`DataFrame.head` and :meth:`DataFrame.tail` will now return deep copies (:issue:`54011`)
6973
- Add lazy copy mechanism to :meth:`DataFrame.eval` (:issue:`53746`)
7074

pandas/core/internals/managers.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,10 @@ def copy_func(ax):
547547

548548
new_axes = [copy_func(ax) for ax in self.axes]
549549
else:
550-
new_axes = list(self.axes)
550+
if using_copy_on_write():
551+
new_axes = [ax.view() for ax in self.axes]
552+
else:
553+
new_axes = list(self.axes)
551554

552555
res = self.apply("copy", deep=deep)
553556
res.axes = new_axes

pandas/tests/copy_view/test_methods.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ def test_copy(using_copy_on_write):
2222
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
2323
df_copy = df.copy()
2424

25+
# the deep copy by defaults takes a shallow copy of the Index
26+
assert df_copy.index is not df.index
27+
assert df_copy.columns is not df.columns
28+
assert df_copy.index.is_(df.index)
29+
assert df_copy.columns.is_(df.columns)
30+
2531
# the deep copy doesn't share memory
2632
assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
2733
if using_copy_on_write:
@@ -37,6 +43,16 @@ def test_copy_shallow(using_copy_on_write):
3743
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
3844
df_copy = df.copy(deep=False)
3945

46+
# the shallow copy also makes a shallow copy of the index
47+
if using_copy_on_write:
48+
assert df_copy.index is not df.index
49+
assert df_copy.columns is not df.columns
50+
assert df_copy.index.is_(df.index)
51+
assert df_copy.columns.is_(df.columns)
52+
else:
53+
assert df_copy.index is df.index
54+
assert df_copy.columns is df.columns
55+
4056
# the shallow copy still shares memory
4157
assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
4258
if using_copy_on_write:

pandas/tests/frame/methods/test_reindex.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ def test_reindex_sparse(self):
607607
)
608608
tm.assert_frame_equal(result, expected)
609609

610-
def test_reindex(self, float_frame):
610+
def test_reindex(self, float_frame, using_copy_on_write):
611611
datetime_series = tm.makeTimeSeries(nper=30)
612612

613613
newFrame = float_frame.reindex(datetime_series.index)
@@ -647,7 +647,10 @@ def test_reindex(self, float_frame):
647647

648648
# Same index, copies values but not index if copy=False
649649
newFrame = float_frame.reindex(float_frame.index, copy=False)
650-
assert newFrame.index is float_frame.index
650+
if using_copy_on_write:
651+
assert newFrame.index.is_(float_frame.index)
652+
else:
653+
assert newFrame.index is float_frame.index
651654

652655
# length zero
653656
newFrame = float_frame.reindex([])

pandas/tests/frame/test_npfuncs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ def test_np_sqrt(self, float_frame):
2222
with np.errstate(all="ignore"):
2323
result = np.sqrt(float_frame)
2424
assert isinstance(result, type(float_frame))
25-
assert result.index is float_frame.index
26-
assert result.columns is float_frame.columns
25+
assert result.index.is_(float_frame.index)
26+
assert result.columns.is_(float_frame.columns)
2727

2828
tm.assert_frame_equal(result, float_frame.apply(np.sqrt))
2929

pandas/tests/reshape/concat/test_index.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,14 @@ def test_concat_copy_index_frame(self, axis, using_copy_on_write):
114114
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
115115
comb = concat([df, df], axis=axis, copy=True)
116116
if not using_copy_on_write:
117-
assert comb.index is not df.index
118-
assert comb.columns is not df.columns
117+
assert not comb.index.is_(df.index)
118+
assert not comb.columns.is_(df.columns)
119119
elif axis in [0, "index"]:
120-
assert comb.index is not df.index
121-
assert comb.columns is df.columns
120+
assert not comb.index.is_(df.index)
121+
assert comb.columns.is_(df.columns)
122122
elif axis in [1, "columns"]:
123-
assert comb.index is df.index
124-
assert comb.columns is not df.columns
123+
assert comb.index.is_(df.index)
124+
assert not comb.columns.is_(df.columns)
125125

126126
def test_default_index(self):
127127
# is_series and ignore_index

pandas/tests/series/methods/test_align.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,16 +127,18 @@ def test_align_nocopy(datetime_series, using_copy_on_write):
127127

128128
def test_align_same_index(datetime_series, using_copy_on_write):
129129
a, b = datetime_series.align(datetime_series, copy=False)
130-
assert a.index is datetime_series.index
131-
assert b.index is datetime_series.index
132-
133-
a, b = datetime_series.align(datetime_series, copy=True)
134130
if not using_copy_on_write:
135-
assert a.index is not datetime_series.index
136-
assert b.index is not datetime_series.index
137-
else:
138131
assert a.index is datetime_series.index
139132
assert b.index is datetime_series.index
133+
else:
134+
assert a.index.is_(datetime_series.index)
135+
assert b.index.is_(datetime_series.index)
136+
137+
a, b = datetime_series.align(datetime_series, copy=True)
138+
assert a.index is not datetime_series.index
139+
assert b.index is not datetime_series.index
140+
assert a.index.is_(datetime_series.index)
141+
assert b.index.is_(datetime_series.index)
140142

141143

142144
def test_align_multiindex():

pandas/tests/series/test_constructors.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -628,12 +628,15 @@ def test_constructor_maskedarray_hardened(self):
628628
expected = Series([np.nan, np.nan, np.nan])
629629
tm.assert_series_equal(result, expected)
630630

631-
def test_series_ctor_plus_datetimeindex(self):
631+
def test_series_ctor_plus_datetimeindex(self, using_copy_on_write):
632632
rng = date_range("20090415", "20090519", freq="B")
633633
data = {k: 1 for k in rng}
634634

635635
result = Series(data, index=rng)
636-
assert result.index is rng
636+
if using_copy_on_write:
637+
assert result.index.is_(rng)
638+
else:
639+
assert result.index is rng
637640

638641
def test_constructor_default_index(self):
639642
s = Series([0, 1, 2])

pandas/tests/test_multilevel.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,20 +46,26 @@ def test_reindex(self, multiindex_dataframe_random_data):
4646
tm.assert_frame_equal(reindexed, expected)
4747

4848
def test_reindex_preserve_levels(
49-
self, multiindex_year_month_day_dataframe_random_data
49+
self, multiindex_year_month_day_dataframe_random_data, using_copy_on_write
5050
):
5151
ymd = multiindex_year_month_day_dataframe_random_data
5252

5353
new_index = ymd.index[::10]
5454
chunk = ymd.reindex(new_index)
55-
assert chunk.index is new_index
55+
if using_copy_on_write:
56+
assert chunk.index.is_(new_index)
57+
else:
58+
assert chunk.index is new_index
5659

5760
chunk = ymd.loc[new_index]
5861
assert chunk.index.equals(new_index)
5962

6063
ymdT = ymd.T
6164
chunk = ymdT.reindex(columns=new_index)
62-
assert chunk.columns is new_index
65+
if using_copy_on_write:
66+
assert chunk.columns.is_(new_index)
67+
else:
68+
assert chunk.columns is new_index
6369

6470
chunk = ymdT.loc[:, new_index]
6571
assert chunk.columns.equals(new_index)

0 commit comments

Comments
 (0)