From 89185cd013358c5f3ed0fa716f5da8ed94563454 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 19 Nov 2023 17:19:04 +0100 Subject: [PATCH 1/3] Create baseline commit for warning message --- pandas/errors/__init__.py | 13 +++++++++++++ scripts/validate_unwanted_patterns.py | 1 + 2 files changed, 14 insertions(+) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 09a612eca0529..e2aa9010dc109 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -503,6 +503,19 @@ class ChainedAssignmentError(Warning): ) +_chained_assignment_warning_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment using an inplace method.\n" + "The behavior will change in pandas 3.0. This inplace method will " + "never work because the intermediate object on which we are setting " + "values always behaves as a copy.\n\n" + "For example, when doing 'df[col].method(value, inplace=True)', try " + "using 'df.method({col: value}, inplace=True)' or " + "df[col] = df[col].method(value) instead, to perform " + "the operation inplace on the original object.\n\n" +) + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 6e6251425928d..5bde4c21cfab5 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -50,6 +50,7 @@ "_global_config", "_chained_assignment_msg", "_chained_assignment_method_msg", + "_chained_assignment_warning_method_msg", "_version_meson", # The numba extensions need this to mock the iloc object "_iLocIndexer", From 9b3d3e8e7117d9c3cae1fc1290ebbf0f9234390d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 19 Nov 2023 17:46:52 +0100 Subject: [PATCH 2/3] CoW: Add warning for update --- pandas/core/frame.py | 8 ++++++++ pandas/core/series.py | 12 ++++++++++++ pandas/tests/copy_view/test_methods.py | 18 +++++++++++++++--- pandas/tests/series/indexing/test_indexing.py | 3 ++- pandas/tests/series/methods/test_update.py | 3 ++- 5 files changed, 39 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 58d2b56e764f6..9f16340d98e22 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -62,6 +62,7 @@ InvalidIndexError, _chained_assignment_method_msg, _chained_assignment_msg, + _chained_assignment_warning_method_msg, ) from pandas.util._decorators import ( Appender, @@ -8849,6 +8850,13 @@ def update( ChainedAssignmentError, stacklevel=2, ) + elif not PYPY and not using_copy_on_write(): + if sys.getrefcount(self) <= REF_COUNT: + warnings.warn( + _chained_assignment_warning_method_msg, + FutureWarning, + stacklevel=2, + ) from pandas.core.computation import expressions diff --git a/pandas/core/series.py b/pandas/core/series.py index 1bbd10429ea22..8de0082941e35 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -44,6 +44,7 @@ InvalidIndexError, _chained_assignment_method_msg, _chained_assignment_msg, + _chained_assignment_warning_method_msg, ) from pandas.util._decorators import ( Appender, @@ -3560,6 +3561,17 @@ def update(self, other: Series | Sequence | Mapping) -> None: ChainedAssignmentError, stacklevel=2, ) + elif not PYPY and not using_copy_on_write(): + ctr = sys.getrefcount(self) + ref_count = REF_COUNT + if hasattr(self, "_cacher"): + ref_count += 1 + if ctr <= ref_count: + warnings.warn( + _chained_assignment_warning_method_msg, + FutureWarning, + stacklevel=2, + ) if not isinstance(other, Series): other = Series(other) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 61569a49863cb..6416dd50daddc 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -12,6 +12,7 @@ Series, Timestamp, date_range, + option_context, period_range, ) import pandas._testing as tm @@ -1684,7 +1685,7 @@ def test_get(using_copy_on_write, warn_copy_on_write, key): warn = FutureWarning if isinstance(key, str) else None else: warn = SettingWithCopyWarning if isinstance(key, list) else None - with pd.option_context("chained_assignment", "warn"): + with option_context("chained_assignment", "warn"): with tm.assert_produces_warning(warn): result.iloc[0] = 0 @@ -1721,7 +1722,7 @@ def test_xs( with tm.assert_cow_warning(single_block or axis == 1): result.iloc[0] = 0 else: - with pd.option_context("chained_assignment", "warn"): + with option_context("chained_assignment", "warn"): with tm.assert_produces_warning(SettingWithCopyWarning): result.iloc[0] = 0 @@ -1756,7 +1757,7 @@ def test_xs_multiindex( warn = SettingWithCopyWarning else: warn = None - with pd.option_context("chained_assignment", "warn"): + with option_context("chained_assignment", "warn"): with tm.assert_produces_warning(warn): result.iloc[0, 0] = 0 @@ -1813,6 +1814,17 @@ def test_update_chained_assignment(using_copy_on_write): with tm.raises_chained_assignment_error(): df[["a"]].update(ser2.to_frame()) tm.assert_frame_equal(df, df_orig) + else: + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + df["a"].update(ser2) + + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with option_context("mode.chained_assignment", None): + df[["a"]].update(ser2.to_frame()) + + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + with option_context("mode.chained_assignment", None): + df[df["a"] > 1].update(ser2.to_frame()) def test_inplace_arithmetic_series(using_copy_on_write): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 0e850c2d20e72..b108ec24732ac 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -295,7 +295,8 @@ def test_underlying_data_conversion(using_copy_on_write): df["val"].update(s) expected = df_original else: - df["val"].update(s) + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + df["val"].update(s) expected = DataFrame( {"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]} ) diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py index c38b2400f0f4e..46b9aa400ee9a 100644 --- a/pandas/tests/series/methods/test_update.py +++ b/pandas/tests/series/methods/test_update.py @@ -34,7 +34,8 @@ def test_update(self, using_copy_on_write): df["c"].update(Series(["foo"], index=[0])) expected = df_orig else: - df["c"].update(Series(["foo"], index=[0])) + with tm.assert_produces_warning(FutureWarning, match="inplace method"): + df["c"].update(Series(["foo"], index=[0])) expected = DataFrame( [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"] ) From a398529f1490cbd05b249239484e76129491914a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 21 Nov 2023 23:31:44 +0100 Subject: [PATCH 3/3] Update series.py --- pandas/core/series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8de0082941e35..a9679f22f9933 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3565,6 +3565,7 @@ def update(self, other: Series | Sequence | Mapping) -> None: ctr = sys.getrefcount(self) ref_count = REF_COUNT if hasattr(self, "_cacher"): + # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 ref_count += 1 if ctr <= ref_count: warnings.warn(