From 8bda83d921e44492d249e6be3385e1ff519f9902 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 13 Feb 2024 12:00:01 -0800 Subject: [PATCH 1/3] Remove assert_cow_warning --- pandas/_testing/__init__.py | 2 -- pandas/_testing/contexts.py | 26 -------------------------- 2 files changed, 28 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index b4cd6d82bb57f..5409c9b1fc0b9 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -72,7 +72,6 @@ get_obj, ) from pandas._testing.contexts import ( - assert_cow_warning, decompress_file, ensure_clean, raises_chained_assignment_error, @@ -583,7 +582,6 @@ def shares_memory(left, right) -> bool: "assert_series_equal", "assert_sp_array_equal", "assert_timedelta_array_equal", - "assert_cow_warning", "at", "BOOL_DTYPES", "box_expected", diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index 3570ebaeffed5..a04322cf97798 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -228,29 +228,3 @@ def raises_chained_assignment_error(warn=True, extra_warnings=(), extra_match=() warning, match="|".join((match, *extra_match)), ) - - -def assert_cow_warning(warn=True, match=None, **kwargs): - """ - Assert that a warning is raised in the CoW warning mode. - - Parameters - ---------- - warn : bool, default True - By default, check that a warning is raised. Can be turned off by passing False. - match : str - The warning message to match against, if different from the default. - kwargs - Passed through to assert_produces_warning - """ - from pandas._testing import assert_produces_warning - - if not warn: - from contextlib import nullcontext - - return nullcontext() - - if not match: - match = "Setting a value on a view" - - return assert_produces_warning(FutureWarning, match=match, **kwargs) From 952e96ff8766752f1b1d0ced3864bfa3b71a7366 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 13 Feb 2024 12:47:49 -0800 Subject: [PATCH 2/3] Remove misc COW stuff --- pandas/core/arrays/arrow/array.py | 6 ++-- pandas/core/internals/blocks.py | 23 -------------- pandas/errors/cow.py | 30 ------------------- .../test_chained_assignment_deprecation.py | 1 - .../copy_view/test_core_functionalities.py | 2 +- pandas/tests/extension/json/test_json.py | 2 +- pandas/tests/frame/indexing/test_indexing.py | 1 - .../tests/indexing/multiindex/test_setitem.py | 2 -- .../indexing/test_chaining_and_caching.py | 1 - scripts/validate_unwanted_patterns.py | 3 -- 10 files changed, 4 insertions(+), 67 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 435d3e4751f6f..b74bba038f932 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1050,8 +1050,7 @@ def _pad_or_backfill( copy: bool = True, ) -> Self: if not self._hasna: - # TODO(CoW): Not necessary anymore when CoW is the default - return self.copy() + return self if limit is None and limit_area is None: method = missing.clean_fill_method(method) @@ -1084,8 +1083,7 @@ def fillna( value, method = validate_fillna_kwargs(value, method) if not self._hasna: - # TODO(CoW): Not necessary anymore when CoW is the default - return self.copy() + return self if limit is not None: return super().fillna(value=value, method=method, limit=limit, copy=copy) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2e96366b2d17b..80c8a1e8ef5c7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -131,29 +131,6 @@ _dtype_obj = np.dtype("object") -COW_WARNING_GENERAL_MSG = """\ -Setting a value on a view: behaviour will change in pandas 3.0. -You are mutating a Series or DataFrame object, and currently this mutation will -also have effect on other Series or DataFrame objects that share data with this -object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object -will never modify another. -""" - - -COW_WARNING_SETITEM_MSG = """\ -Setting a value on a view: behaviour will change in pandas 3.0. -Currently, the mutation will also have effect on the object that shares data -with this object. For example, when setting a value in a Series that was -extracted from a column of a DataFrame, that DataFrame will also be updated: - - ser = df["col"] - ser[0] = 0 <--- in pandas 2, this also updates `df` - -In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never -modify another, and thus in the example above, `df` will not be changed. -""" - - def maybe_split(meth: F) -> F: """ If we have a multi-column block, split and operate block-wise. Otherwise diff --git a/pandas/errors/cow.py b/pandas/errors/cow.py index 9a3f6f4cc8efc..1e7829c88ae7e 100644 --- a/pandas/errors/cow.py +++ b/pandas/errors/cow.py @@ -22,33 +22,3 @@ "using 'df.method({col: value}, inplace=True)' instead, to perform " "the operation inplace on the original object.\n\n" ) - - -_chained_assignment_warning_msg = ( - "ChainedAssignmentError: behaviour will change in pandas 3.0!\n" - "You are setting values through chained assignment. Currently this works " - "in certain cases, but when using Copy-on-Write (which will become the " - "default behaviour in pandas 3.0) this will never work to update the " - "original DataFrame or Series, because the intermediate object on which " - "we are setting values will behave as a copy.\n" - "A typical example is when you are setting values in a column of a " - "DataFrame, like:\n\n" - 'df["col"][row_indexer] = value\n\n' - 'Use `df.loc[row_indexer, "col"] = values` instead, to perform the ' - "assignment in a single step and ensure this keeps updating the original `df`.\n\n" - "See the caveats in the documentation: " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy\n" -) - -_chained_assignment_warning_method_msg = ( - "A value is trying to be set on a copy of a DataFrame or Series " - "through chained assignment using an inplace method.\n" - "The behavior will change in pandas 3.0. This inplace method will " - "never work because the intermediate object on which we are setting " - "values always behaves as a copy.\n\n" - "For example, when doing 'df[col].method(value, inplace=True)', try " - "using 'df.method({col: value}, inplace=True)' or " - "df[col] = df[col].method(value) instead, to perform " - "the operation inplace on the original object.\n\n" -) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 76e3df4afb52c..4aef69a6fde98 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -7,7 +7,6 @@ import pandas._testing as tm -# TODO(CoW-warn) expand the cases @pytest.mark.parametrize( "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] ) diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py index 70d7112ddbd89..ad16bafdf0ee4 100644 --- a/pandas/tests/copy_view/test_core_functionalities.py +++ b/pandas/tests/copy_view/test_core_functionalities.py @@ -46,7 +46,7 @@ def test_setitem_with_view_invalidated_does_not_copy(request): df["b"] = 100 arr = get_array(df, "a") view = None # noqa: F841 - # TODO(CoW-warn) false positive? -> block gets split because of `df["b"] = 100` + # TODO(CoW) block gets split because of `df["b"] = 100` # which introduces additional refs, even when those of `view` go out of scopes df.iloc[0, 0] = 100 # Setitem split the block. Since the old block shared data with view diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 1e4897368cc7a..64f46f218d2b3 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -218,7 +218,7 @@ def test_fillna_copy_frame(self, data_missing): super().test_fillna_copy_frame(data_missing) @pytest.mark.xfail(reason="Fails with CoW") - def test_equals_same_data_different_object(self, data, request): + def test_equals_same_data_different_object(self, data): super().test_equals_same_data_different_object(data) @pytest.mark.xfail(reason="failing on np.array(self, dtype=str)") diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 97176b20376ff..59bdd43d48ba0 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -321,7 +321,6 @@ def test_setitem(self, float_frame, using_infer_string): # so raise/warn smaller = float_frame[:2] - # With CoW, adding a new column doesn't raise a warning smaller["col10"] = ["1", "2"] if using_infer_string: diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 1b6e1341a9c40..abf89c2b0d096 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -207,8 +207,6 @@ def test_multiindex_assignment_single_dtype(self): ) # arr can be losslessly cast to int, so this setitem is inplace - # INFO(CoW-warn) this does not warn because we directly took .values - # above, so no reference to a pandas object is alive for `view` df.loc[4, "c"] = arr exp = Series(arr, index=[8, 10], name="c", dtype="int64") result = df.loc[4, "c"] diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 718ea69960775..c9e0aa2ffd77c 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -324,7 +324,6 @@ def test_detect_chained_assignment_warning_stacklevel(self, rhs): df = DataFrame(np.arange(25).reshape(5, 5)) df_original = df.copy() chained = df.loc[:3] - # INFO(CoW) no warning, and original dataframe not changed chained[2] = rhs tm.assert_frame_equal(df, df_original) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index ce95f5a76e0cc..fd8963ad85abc 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -49,9 +49,6 @@ "_global_config", "_chained_assignment_msg", "_chained_assignment_method_msg", - "_chained_assignment_warning_msg", - "_chained_assignment_warning_method_msg", - "_check_cacher", "_version_meson", # The numba extensions need this to mock the iloc object "_iLocIndexer", From 41323d5fb676f1dafab1aeabcb3bac34dc95174b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 13 Feb 2024 13:39:45 -0800 Subject: [PATCH 3/3] Undo todo --- pandas/core/arrays/arrow/array.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b74bba038f932..435d3e4751f6f 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1050,7 +1050,8 @@ def _pad_or_backfill( copy: bool = True, ) -> Self: if not self._hasna: - return self + # TODO(CoW): Not necessary anymore when CoW is the default + return self.copy() if limit is None and limit_area is None: method = missing.clean_fill_method(method) @@ -1083,7 +1084,8 @@ def fillna( value, method = validate_fillna_kwargs(value, method) if not self._hasna: - return self + # TODO(CoW): Not necessary anymore when CoW is the default + return self.copy() if limit is not None: return super().fillna(value=value, method=method, limit=limit, copy=copy)