Skip to content

Commit be3e9fa

Browse files
API: detect and raise error for chained assignment under Copy-on-Write
1 parent 83f53d6 commit be3e9fa

File tree

10 files changed

+182
-50
lines changed

10 files changed

+182
-50
lines changed

pandas/core/frame.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import functools
1717
from io import StringIO
1818
import itertools
19+
import sys
1920
from textwrap import dedent
2021
from typing import (
2122
TYPE_CHECKING,
@@ -91,7 +92,10 @@
9192
function as nv,
9293
np_percentile_argname,
9394
)
94-
from pandas.errors import InvalidIndexError
95+
from pandas.errors import (
96+
ChainedAssignmentError,
97+
InvalidIndexError,
98+
)
9599
from pandas.util._decorators import (
96100
Appender,
97101
Substitution,
@@ -3911,6 +3915,13 @@ def isetitem(self, loc, value) -> None:
39113915
self._iset_item_mgr(loc, arraylike, inplace=False)
39123916

39133917
def __setitem__(self, key, value):
3918+
if (
3919+
get_option("mode.copy_on_write")
3920+
and get_option("mode.data_manager") == "block"
3921+
):
3922+
if sys.getrefcount(self) <= 3:
3923+
raise ChainedAssignmentError("Chained assignment doesn't work!!")
3924+
39143925
key = com.apply_if_callable(key, self)
39153926

39163927
# see if we can slice the rows

pandas/core/indexing.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from contextlib import suppress
4+
import sys
45
from typing import (
56
TYPE_CHECKING,
67
Hashable,
@@ -13,6 +14,8 @@
1314

1415
import numpy as np
1516

17+
from pandas._config import get_option
18+
1619
from pandas._libs.indexing import NDFrameIndexerBase
1720
from pandas._libs.lib import item_from_zerodim
1821
from pandas._typing import (
@@ -21,6 +24,7 @@
2124
)
2225
from pandas.errors import (
2326
AbstractMethodError,
27+
ChainedAssignmentError,
2428
IndexingError,
2529
InvalidIndexError,
2630
)
@@ -813,6 +817,14 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
813817

814818
@final
815819
def __setitem__(self, key, value) -> None:
820+
if (
821+
get_option("mode.copy_on_write")
822+
and get_option("mode.data_manager") == "block"
823+
):
824+
print("_LocationIndexer.__setitem__ refcount: ", sys.getrefcount(self.obj))
825+
if sys.getrefcount(self.obj) <= 2:
826+
raise ChainedAssignmentError("Chained assignment doesn't work!!")
827+
816828
check_deprecated_indexers(key)
817829
if isinstance(key, tuple):
818830
key = tuple(list(x) if is_iterator(x) else x for x in key)

pandas/core/series.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44
from __future__ import annotations
55

6+
import sys
67
from textwrap import dedent
78
from typing import (
89
IO,
@@ -63,7 +64,10 @@
6364
npt,
6465
)
6566
from pandas.compat.numpy import function as nv
66-
from pandas.errors import InvalidIndexError
67+
from pandas.errors import (
68+
ChainedAssignmentError,
69+
InvalidIndexError,
70+
)
6771
from pandas.util._decorators import (
6872
Appender,
6973
Substitution,
@@ -1058,6 +1062,14 @@ def _get_value(self, label, takeable: bool = False):
10581062
return self.iloc[loc]
10591063

10601064
def __setitem__(self, key, value) -> None:
1065+
if (
1066+
get_option("mode.copy_on_write")
1067+
and get_option("mode.data_manager") == "block"
1068+
):
1069+
print("Series.__getitem__ refcount: ", sys.getrefcount(self))
1070+
if sys.getrefcount(self) <= 3:
1071+
raise ChainedAssignmentError("Chained assignment doesn't work!!")
1072+
10611073
check_deprecated_indexers(key)
10621074
key = com.apply_if_callable(key, self)
10631075
cacher_needs_updating = self._check_is_chained_assignment_possible()

pandas/errors/__init__.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,28 @@ class SettingWithCopyError(ValueError):
298298
"""
299299

300300

301+
class ChainedAssignmentError(ValueError):
302+
"""
303+
Exception raised when trying to set on a copied slice from a ``DataFrame``.
304+
305+
The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can
306+
happen unintentionally when chained indexing.
307+
308+
For more information on eveluation order,
309+
see :ref:`the user guide<indexing.evaluation_order>`.
310+
311+
For more information on view vs. copy,
312+
see :ref:`the user guide<indexing.view_versus_copy>`.
313+
314+
Examples
315+
--------
316+
>>> pd.options.mode.chained_assignment = 'raise'
317+
>>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
318+
>>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP
319+
... # SettingWithCopyError: A value is trying to be set on a copy of a...
320+
"""
321+
322+
301323
class SettingWithCopyWarning(Warning):
302324
"""
303325
Warning raised when trying to set on a copied slice from a ``DataFrame``.

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44
import pytest
55

6+
from pandas.errors import ChainedAssignmentError
67
import pandas.util._test_decorators as td
78

89
from pandas.core.dtypes.base import _registry as ea_registry
@@ -1127,12 +1128,17 @@ def test_loc_setitem_all_false_boolean_two_blocks(self):
11271128

11281129

11291130
class TestDataFrameSetitemCopyViewSemantics:
1130-
def test_setitem_always_copy(self, float_frame):
1131+
def test_setitem_always_copy(self, float_frame, using_copy_on_write):
11311132
assert "E" not in float_frame.columns
11321133
s = float_frame["A"].copy()
11331134
float_frame["E"] = s
11341135

1135-
float_frame["E"][5:10] = np.nan
1136+
if using_copy_on_write:
1137+
with pytest.raises(ChainedAssignmentError):
1138+
float_frame["E"][5:10] = np.nan
1139+
float_frame.loc[5:10, "E"] = np.nan
1140+
else:
1141+
float_frame["E"][5:10] = np.nan
11361142
assert notna(s[5:10]).all()
11371143

11381144
@pytest.mark.parametrize("consolidate", [True, False])
@@ -1246,12 +1252,15 @@ def test_setitem_column_update_inplace(self, using_copy_on_write):
12461252
df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels)
12471253
values = df._mgr.blocks[0].values
12481254

1249-
for label in df.columns:
1250-
df[label][label] = 1
1251-
12521255
if not using_copy_on_write:
1256+
for label in df.columns:
1257+
df[label][label] = 1
1258+
12531259
# diagonal values all updated
12541260
assert np.all(values[np.arange(10), np.arange(10)] == 1)
12551261
else:
1262+
with pytest.raises(ChainedAssignmentError):
1263+
for label in df.columns:
1264+
df[label][label] = 1
12561265
# original dataframe not updated
12571266
assert np.all(values[np.arange(10), np.arange(10)] == 0)

pandas/tests/frame/indexing/test_xs.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import numpy as np
44
import pytest
55

6-
from pandas.errors import SettingWithCopyError
6+
from pandas.errors import (
7+
ChainedAssignmentError,
8+
SettingWithCopyError,
9+
)
710

811
from pandas import (
912
DataFrame,
@@ -125,7 +128,8 @@ def test_xs_view(self, using_array_manager, using_copy_on_write):
125128
df_orig = dm.copy()
126129

127130
if using_copy_on_write:
128-
dm.xs(2)[:] = 20
131+
with pytest.raises(ChainedAssignmentError):
132+
dm.xs(2)[:] = 20
129133
tm.assert_frame_equal(dm, df_orig)
130134
elif using_array_manager:
131135
# INFO(ArrayManager) with ArrayManager getting a row as a view is

pandas/tests/indexing/multiindex/test_chaining_and_caching.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.errors import SettingWithCopyError
4+
from pandas.errors import (
5+
ChainedAssignmentError,
6+
SettingWithCopyError,
7+
)
58
import pandas.util._test_decorators as td
69

710
from pandas import (
@@ -50,11 +53,13 @@ def test_cache_updating(using_copy_on_write):
5053

5154
# setting via chained assignment
5255
# but actually works, since everything is a view
53-
df.loc[0]["z"].iloc[0] = 1.0
54-
result = df.loc[(0, 0), "z"]
5556
if using_copy_on_write:
56-
assert result == df_original.loc[0, "z"]
57+
with pytest.raises(ChainedAssignmentError):
58+
df.loc[0]["z"].iloc[0] = 1.0
59+
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
5760
else:
61+
df.loc[0]["z"].iloc[0] = 1.0
62+
result = df.loc[(0, 0), "z"]
5863
assert result == 1
5964

6065
# correct setting

pandas/tests/indexing/multiindex/test_partial.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.errors import ChainedAssignmentError
45
import pandas.util._test_decorators as td
56

67
from pandas import (
@@ -133,20 +134,26 @@ def test_partial_set(
133134
exp.iloc[65:85] = 0
134135
tm.assert_frame_equal(df, exp)
135136

136-
df["A"].loc[2000, 4] = 1
137-
if not using_copy_on_write:
138-
exp["A"].loc[2000, 4].values[:] = 1
137+
if using_copy_on_write:
138+
with pytest.raises(ChainedAssignmentError):
139+
df["A"].loc[2000, 4] = 1
140+
df.loc[(2000, 4), "A"] = 1
141+
else:
142+
df["A"].loc[2000, 4] = 1
143+
exp.iloc[65:85, 0] = 1
139144
tm.assert_frame_equal(df, exp)
140145

141146
df.loc[2000] = 5
142147
exp.iloc[:100] = 5
143148
tm.assert_frame_equal(df, exp)
144149

145150
# this works...for now
146-
df["A"].iloc[14] = 5
147151
if using_copy_on_write:
152+
with pytest.raises(ChainedAssignmentError):
153+
df["A"].iloc[14] = 5
148154
df["A"].iloc[14] == exp["A"].iloc[14]
149155
else:
156+
df["A"].iloc[14] = 5
150157
assert df["A"].iloc[14] == 5
151158

152159
@pytest.mark.parametrize("dtype", [int, float])

pandas/tests/indexing/multiindex/test_setitem.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.errors import SettingWithCopyError
4+
from pandas.errors import (
5+
ChainedAssignmentError,
6+
SettingWithCopyError,
7+
)
58
import pandas.util._test_decorators as td
69

710
import pandas as pd
@@ -501,8 +504,8 @@ def test_frame_setitem_copy_raises(
501504
# will raise/warn as its chained assignment
502505
df = multiindex_dataframe_random_data.T
503506
if using_copy_on_write:
504-
# TODO(CoW) it would be nice if this could still warn/raise
505-
df["foo"]["one"] = 2
507+
with pytest.raises(ChainedAssignmentError):
508+
df["foo"]["one"] = 2
506509
else:
507510
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
508511
with pytest.raises(SettingWithCopyError, match=msg):
@@ -516,7 +519,8 @@ def test_frame_setitem_copy_no_write(
516519
expected = frame
517520
df = frame.copy()
518521
if using_copy_on_write:
519-
df["foo"]["one"] = 2
522+
with pytest.raises(ChainedAssignmentError):
523+
df["foo"]["one"] = 2
520524
else:
521525
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
522526
with pytest.raises(SettingWithCopyError, match=msg):

0 commit comments

Comments
 (0)