Skip to content

Commit d9aefbf

Browse files
CoW: add warning mode for cases that will change behaviour
1 parent 7ec95e4 commit d9aefbf

21 files changed

+175
-68
lines changed

pandas/_config/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"option_context",
1616
"options",
1717
"using_copy_on_write",
18+
"warn_copy_on_write",
1819
]
1920
from pandas._config import config
2021
from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401
@@ -32,7 +33,18 @@
3233

3334
def using_copy_on_write() -> bool:
3435
_mode_options = _global_config["mode"]
35-
return _mode_options["copy_on_write"] and _mode_options["data_manager"] == "block"
36+
return (
37+
_mode_options["copy_on_write"] is True
38+
and _mode_options["data_manager"] == "block"
39+
)
40+
41+
42+
def warn_copy_on_write() -> bool:
43+
_mode_options = _global_config["mode"]
44+
return (
45+
_mode_options["copy_on_write"] == "warn"
46+
and _mode_options["data_manager"] == "block"
47+
)
3648

3749

3850
def using_nullable_dtypes() -> bool:

pandas/conftest.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1994,7 +1994,18 @@ def using_copy_on_write() -> bool:
19941994
Fixture to check if Copy-on-Write is enabled.
19951995
"""
19961996
return (
1997-
pd.options.mode.copy_on_write
1997+
pd.options.mode.copy_on_write is True
1998+
and _get_option("mode.data_manager", silent=True) == "block"
1999+
)
2000+
2001+
2002+
@pytest.fixture
2003+
def warn_copy_on_write() -> bool:
2004+
"""
2005+
Fixture to check if Copy-on-Write is enabled.
2006+
"""
2007+
return (
2008+
pd.options.mode.copy_on_write == "warn"
19982009
and _get_option("mode.data_manager", silent=True) == "block"
19992010
)
20002011

pandas/core/config_init.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -476,9 +476,11 @@ def use_inf_as_na_cb(key) -> None:
476476
"copy_on_write",
477477
# Get the default from an environment variable, if set, otherwise defaults
478478
# to False. This environment variable can be set for testing.
479-
os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
479+
"warn"
480+
if os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "warn"
481+
else os.environ.get("PANDAS_COPY_ON_WRITE", "0") == "1",
480482
copy_on_write_doc,
481-
validator=is_bool,
483+
validator=is_one_of_factory([True, False, "warn"]),
482484
)
483485

484486

pandas/core/generic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from pandas._config import (
3131
config,
3232
using_copy_on_write,
33+
warn_copy_on_write,
3334
)
3435

3536
from pandas._libs import lib
@@ -4396,7 +4397,7 @@ def _check_setitem_copy(self, t: str = "setting", force: bool_t = False):
43964397
df.iloc[0:5]['group'] = 'a'
43974398
43984399
"""
4399-
if using_copy_on_write():
4400+
if using_copy_on_write() or warn_copy_on_write():
44004401
return
44014402

44024403
# return early if the check is not needed

pandas/core/internals/managers.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616

1717
import numpy as np
1818

19-
from pandas._config import using_copy_on_write
19+
from pandas._config import (
20+
using_copy_on_write,
21+
warn_copy_on_write,
22+
)
2023

2124
from pandas._libs import (
2225
internals as libinternals,
@@ -1988,9 +1991,17 @@ def setitem_inplace(self, indexer, value) -> None:
19881991
in place, not returning a new Manager (and Block), and thus never changing
19891992
the dtype.
19901993
"""
1991-
if using_copy_on_write() and not self._has_no_reference(0):
1992-
self.blocks = (self._block.copy(),)
1993-
self._cache.clear()
1994+
if not self._has_no_reference(0):
1995+
if using_copy_on_write():
1996+
self.blocks = (self._block.copy(),)
1997+
self._cache.clear()
1998+
elif warn_copy_on_write():
1999+
warnings.warn(
2000+
"Setting value on view: behaviour will change in pandas 3.0 "
2001+
"with Copy-on-Write ...",
2002+
FutureWarning,
2003+
stacklevel=find_stack_level(),
2004+
)
19942005

19952006
super().setitem_inplace(indexer, value)
19962007

pandas/tests/copy_view/test_indexing.py

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,9 @@ def test_subset_row_slice(backend, using_copy_on_write):
139139
@pytest.mark.parametrize(
140140
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
141141
)
142-
def test_subset_column_slice(backend, using_copy_on_write, using_array_manager, dtype):
142+
def test_subset_column_slice(
143+
backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype
144+
):
143145
# Case: taking a subset of the columns of a DataFrame using a slice
144146
# + afterwards modifying the subset
145147
dtype_backend, DataFrame, _ = backend
@@ -159,10 +161,14 @@ def test_subset_column_slice(backend, using_copy_on_write, using_array_manager,
159161

160162
subset.iloc[0, 0] = 0
161163
assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b"))
162-
163164
else:
164165
# we only get a warning in case of a single block
165-
warn = SettingWithCopyWarning if single_block else None
166+
# TODO
167+
warn = (
168+
SettingWithCopyWarning
169+
if (single_block and not warn_copy_on_write)
170+
else None
171+
)
166172
with pd.option_context("chained_assignment", "warn"):
167173
with tm.assert_produces_warning(warn):
168174
subset.iloc[0, 0] = 0
@@ -303,7 +309,9 @@ def test_subset_iloc_rows_columns(
303309
[slice(0, 2), np.array([True, True, False]), np.array([0, 1])],
304310
ids=["slice", "mask", "array"],
305311
)
306-
def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on_write):
312+
def test_subset_set_with_row_indexer(
313+
backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write
314+
):
307315
# Case: setting values with a row indexer on a viewing subset
308316
# subset[indexer] = value and subset.iloc[indexer] = value
309317
_, DataFrame, _ = backend
@@ -318,7 +326,8 @@ def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on
318326
):
319327
pytest.skip("setitem with labels selects on columns")
320328

321-
if using_copy_on_write:
329+
# TODO
330+
if using_copy_on_write or warn_copy_on_write:
322331
indexer_si(subset)[indexer] = 0
323332
else:
324333
# INFO iloc no longer raises warning since pandas 1.4
@@ -340,7 +349,7 @@ def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on
340349
tm.assert_frame_equal(df, df_orig)
341350

342351

343-
def test_subset_set_with_mask(backend, using_copy_on_write):
352+
def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write):
344353
# Case: setting values with a mask on a viewing subset: subset[mask] = value
345354
_, DataFrame, _ = backend
346355
df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]})
@@ -349,7 +358,8 @@ def test_subset_set_with_mask(backend, using_copy_on_write):
349358

350359
mask = subset > 3
351360

352-
if using_copy_on_write:
361+
# TODO
362+
if using_copy_on_write or warn_copy_on_write:
353363
subset[mask] = 0
354364
else:
355365
with pd.option_context("chained_assignment", "warn"):
@@ -370,7 +380,7 @@ def test_subset_set_with_mask(backend, using_copy_on_write):
370380
tm.assert_frame_equal(df, df_orig)
371381

372382

373-
def test_subset_set_column(backend, using_copy_on_write):
383+
def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write):
374384
# Case: setting a single column on a viewing subset -> subset[col] = value
375385
dtype_backend, DataFrame, _ = backend
376386
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
@@ -382,7 +392,8 @@ def test_subset_set_column(backend, using_copy_on_write):
382392
else:
383393
arr = pd.array([10, 11], dtype="Int64")
384394

385-
if using_copy_on_write:
395+
# TODO
396+
if using_copy_on_write or warn_copy_on_write:
386397
subset["a"] = arr
387398
else:
388399
with pd.option_context("chained_assignment", "warn"):
@@ -472,7 +483,7 @@ def test_subset_set_column_with_loc2(backend, using_copy_on_write, using_array_m
472483
@pytest.mark.parametrize(
473484
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
474485
)
475-
def test_subset_set_columns(backend, using_copy_on_write, dtype):
486+
def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dtype):
476487
# Case: setting multiple columns on a viewing subset
477488
# -> subset[[col1, col2]] = value
478489
dtype_backend, DataFrame, _ = backend
@@ -482,7 +493,8 @@ def test_subset_set_columns(backend, using_copy_on_write, dtype):
482493
df_orig = df.copy()
483494
subset = df[1:3]
484495

485-
if using_copy_on_write:
496+
# TODO
497+
if using_copy_on_write or warn_copy_on_write:
486498
subset[["a", "c"]] = 0
487499
else:
488500
with pd.option_context("chained_assignment", "warn"):
@@ -879,7 +891,9 @@ def test_del_series(backend):
879891
# Accessing column as Series
880892

881893

882-
def test_column_as_series(backend, using_copy_on_write, using_array_manager):
894+
def test_column_as_series(
895+
backend, using_copy_on_write, warn_copy_on_write, using_array_manager
896+
):
883897
# Case: selecting a single column now also uses Copy-on-Write
884898
dtype_backend, DataFrame, Series = backend
885899
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
@@ -892,10 +906,14 @@ def test_column_as_series(backend, using_copy_on_write, using_array_manager):
892906
if using_copy_on_write or using_array_manager:
893907
s[0] = 0
894908
else:
895-
warn = SettingWithCopyWarning if dtype_backend == "numpy" else None
896-
with pd.option_context("chained_assignment", "warn"):
897-
with tm.assert_produces_warning(warn):
909+
if warn_copy_on_write:
910+
with tm.assert_produces_warning(FutureWarning):
898911
s[0] = 0
912+
else:
913+
warn = SettingWithCopyWarning if dtype_backend == "numpy" else None
914+
with pd.option_context("chained_assignment", "warn"):
915+
with tm.assert_produces_warning(warn):
916+
s[0] = 0
899917

900918
expected = Series([0, 2, 3], name="a")
901919
tm.assert_series_equal(s, expected)
@@ -910,7 +928,7 @@ def test_column_as_series(backend, using_copy_on_write, using_array_manager):
910928

911929

912930
def test_column_as_series_set_with_upcast(
913-
backend, using_copy_on_write, using_array_manager
931+
backend, using_copy_on_write, using_array_manager, warn_copy_on_write
914932
):
915933
# Case: selecting a single column now also uses Copy-on-Write -> when
916934
# setting a value causes an upcast, we don't need to update the parent
@@ -921,10 +939,12 @@ def test_column_as_series_set_with_upcast(
921939

922940
s = df["a"]
923941
if dtype_backend == "nullable":
924-
with pytest.raises(TypeError, match="Invalid value"):
925-
s[0] = "foo"
942+
warn = FutureWarning if warn_copy_on_write else None
943+
with tm.assert_produces_warning(warn):
944+
with pytest.raises(TypeError, match="Invalid value"):
945+
s[0] = "foo"
926946
expected = Series([1, 2, 3], name="a")
927-
elif using_copy_on_write or using_array_manager:
947+
elif using_copy_on_write or warn_copy_on_write or using_array_manager:
928948
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
929949
s[0] = "foo"
930950
expected = Series(["foo", 2, 3], dtype=object, name="a")
@@ -962,7 +982,12 @@ def test_column_as_series_set_with_upcast(
962982
ids=["getitem", "loc", "iloc"],
963983
)
964984
def test_column_as_series_no_item_cache(
965-
request, backend, method, using_copy_on_write, using_array_manager
985+
request,
986+
backend,
987+
method,
988+
using_copy_on_write,
989+
warn_copy_on_write,
990+
using_array_manager,
966991
):
967992
# Case: selecting a single column (which now also uses Copy-on-Write to protect
968993
# the view) should always give a new object (i.e. not make use of a cache)
@@ -979,7 +1004,8 @@ def test_column_as_series_no_item_cache(
9791004
else:
9801005
assert s1 is s2
9811006

982-
if using_copy_on_write or using_array_manager:
1007+
# TODO
1008+
if using_copy_on_write or warn_copy_on_write or using_array_manager:
9831009
s1.iloc[0] = 0
9841010
else:
9851011
warn = SettingWithCopyWarning if dtype_backend == "numpy" else None

pandas/tests/copy_view/test_methods.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,7 +1651,7 @@ def test_isetitem_frame(using_copy_on_write):
16511651

16521652

16531653
@pytest.mark.parametrize("key", ["a", ["a"]])
1654-
def test_get(using_copy_on_write, key):
1654+
def test_get(using_copy_on_write, warn_copy_on_write, key):
16551655
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
16561656
df_orig = df.copy()
16571657

@@ -1665,7 +1665,12 @@ def test_get(using_copy_on_write, key):
16651665
else:
16661666
# for non-CoW it depends on whether we got a Series or DataFrame if it
16671667
# is a view or copy or triggers a warning or not
1668-
warn = SettingWithCopyWarning if isinstance(key, list) else None
1668+
# TODO(CoW) should warn
1669+
warn = (
1670+
(None if warn_copy_on_write else SettingWithCopyWarning)
1671+
if isinstance(key, list)
1672+
else None
1673+
)
16691674
with pd.option_context("chained_assignment", "warn"):
16701675
with tm.assert_produces_warning(warn):
16711676
result.iloc[0] = 0
@@ -1680,7 +1685,9 @@ def test_get(using_copy_on_write, key):
16801685
@pytest.mark.parametrize(
16811686
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
16821687
)
1683-
def test_xs(using_copy_on_write, using_array_manager, axis, key, dtype):
1688+
def test_xs(
1689+
using_copy_on_write, warn_copy_on_write, using_array_manager, axis, key, dtype
1690+
):
16841691
single_block = (dtype == "int64") and not using_array_manager
16851692
is_view = single_block or (using_array_manager and axis == 1)
16861693
df = DataFrame(
@@ -1695,8 +1702,13 @@ def test_xs(using_copy_on_write, using_array_manager, axis, key, dtype):
16951702
elif using_copy_on_write:
16961703
assert result._mgr._has_no_reference(0)
16971704

1705+
# TODO(CoW) should warn in case of is_view
16981706
if using_copy_on_write or is_view:
16991707
result.iloc[0] = 0
1708+
elif warn_copy_on_write:
1709+
warn = FutureWarning if single_block else None
1710+
with tm.assert_produces_warning(warn):
1711+
result.iloc[0] = 0
17001712
else:
17011713
with pd.option_context("chained_assignment", "warn"):
17021714
with tm.assert_produces_warning(SettingWithCopyWarning):
@@ -1710,7 +1722,9 @@ def test_xs(using_copy_on_write, using_array_manager, axis, key, dtype):
17101722

17111723
@pytest.mark.parametrize("axis", [0, 1])
17121724
@pytest.mark.parametrize("key, level", [("l1", 0), (2, 1)])
1713-
def test_xs_multiindex(using_copy_on_write, using_array_manager, key, level, axis):
1725+
def test_xs_multiindex(
1726+
using_copy_on_write, warn_copy_on_write, using_array_manager, key, level, axis
1727+
):
17141728
arr = np.arange(18).reshape(6, 3)
17151729
index = MultiIndex.from_product([["l1", "l2"], [1, 2, 3]], names=["lev1", "lev2"])
17161730
df = DataFrame(arr, index=index, columns=list("abc"))
@@ -1725,8 +1739,9 @@ def test_xs_multiindex(using_copy_on_write, using_array_manager, key, level, axi
17251739
get_array(df, df.columns[0]), get_array(result, result.columns[0])
17261740
)
17271741

1742+
# TODO(CoW) should warn
17281743
warn = (
1729-
SettingWithCopyWarning
1744+
(None if warn_copy_on_write else SettingWithCopyWarning)
17301745
if not using_copy_on_write and not using_array_manager
17311746
else None
17321747
)

pandas/tests/extension/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,6 @@ def using_copy_on_write() -> bool:
215215
Fixture to check if Copy-on-Write is enabled.
216216
"""
217217
return (
218-
options.mode.copy_on_write
218+
options.mode.copy_on_write is True
219219
and _get_option("mode.data_manager", silent=True) == "block"
220220
)

0 commit comments

Comments
 (0)