diff --git a/doc/source/whatsnew/v1.5.3.rst b/doc/source/whatsnew/v1.5.3.rst index 92577c48b865b..8fe7debee4713 100644 --- a/doc/source/whatsnew/v1.5.3.rst +++ b/doc/source/whatsnew/v1.5.3.rst @@ -27,6 +27,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Bug in the Copy-on-Write implementation losing track of views when indexing a :class:`DataFrame` with another :class:`DataFrame` (:issue:`50630`) - Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`) - Bug in :meth:`Series.quantile` emitting warning from NumPy when :class:`Series` has only ``NA`` values (:issue:`50681`) - Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ac0d614dfea89..98079d0c5ab4d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -384,10 +384,8 @@ def setitem(self: T, indexer, value) -> T: return self.apply("setitem", indexer=indexer, value=value) def putmask(self, mask, new, align: bool = True): - if ( - using_copy_on_write() - and self.refs is not None - and not all(ref is None for ref in self.refs) + if using_copy_on_write() and any( + not self._has_no_reference_block(i) for i in range(len(self.blocks)) ): # some reference -> copy full dataframe # TODO(CoW) this could be optimized to only copy the blocks that would diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 32b28e584e835..75f6572f40e71 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -796,6 +796,21 @@ def test_squeeze(using_copy_on_write): assert df.loc[0, "a"] == 0 +def test_putmask(using_copy_on_write): + df = DataFrame({"a": [1, 2], "b": 1, "c": 2}) + view = df[:] + df_orig = df.copy() + df[df == df] = 5 + + if using_copy_on_write: + assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) + tm.assert_frame_equal(view, df_orig) + else: + # Without CoW the original will be modified + assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) + assert view.iloc[0, 0] == 5 + + def test_isetitem(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy()