From 4f77590825a6671dcd9fe8277da56835e81bc64e Mon Sep 17 00:00:00 2001 From: Chilin Date: Sun, 16 Feb 2025 03:37:52 +0800 Subject: [PATCH 1/6] BUG: fix assign failure issue when Copy-on-Write --- pandas/core/internals/managers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a3738bb25f56c..92dfd64c3a86f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -572,7 +572,13 @@ def setitem(self, indexer, value) -> Self: 0, blk_loc, values ) # first block equals values - self.blocks[0].setitem((indexer[0], np.arange(len(blk_loc))), value) + is_full_column_selection = indexer[1] == slice(None) + col_indexer = ( + slice(None) + if is_full_column_selection + else np.arange(len(blk_loc)) + ) + self.blocks[0].setitem((indexer[0], col_indexer), value) return self # No need to split if we either set all columns or on a single block # manager From 6921ebbc8d64df8e3f69efbaabb473f36759eeb9 Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 16 Feb 2025 22:40:16 +0800 Subject: [PATCH 2/6] Check if data type is slice before comparing with slice(None) --- pandas/core/internals/managers.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 92dfd64c3a86f..289667c309d45 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -572,12 +572,10 @@ def setitem(self, indexer, value) -> Self: 0, blk_loc, values ) # first block equals values - is_full_column_selection = indexer[1] == slice(None) - col_indexer = ( - slice(None) - if is_full_column_selection - else np.arange(len(blk_loc)) - ) + if isinstance(indexer[1], slice) and indexer[1] == slice(None): + col_indexer = slice(None) + else: + col_indexer = np.arange(len(blk_loc)) self.blocks[0].setitem((indexer[0], col_indexer), value) return self # No need to split if we either set all columns or on a single block From 69da0209dd9e7eb6f161ad9e42bc231dd4611ac1 Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 16 Feb 2025 22:41:19 +0800 Subject: [PATCH 3/6] Add new testcase for assignment fails with enable Copy-on-Write --- pandas/tests/indexing/test_iloc.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 2f6998a85c80b..ef9f64e28ffa7 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -22,6 +22,7 @@ date_range, interval_range, isna, + option_context, to_datetime, ) import pandas._testing as tm @@ -1448,3 +1449,26 @@ def test_iloc_nullable_int64_size_1_nan(self): result = DataFrame({"a": ["test"], "b": [np.nan]}) with pytest.raises(TypeError, match="Invalid value"): result.loc[:, "b"] = result.loc[:, "b"].astype("Int64") + + def test_iloc_setitem_list_with_cow(self): + # GH#60309 + with option_context("mode.copy_on_write", True): + dftest = DataFrame( + {"A": [1, 4, 1, 5], "B": [2, 5, 2, 6], "C": [3, 6, 1, 7]} + ) + df = dftest[["B", "C"]] + + # Perform the iloc operation + df.iloc[[1, 3], :] = [[2, 2], [2, 2]] + + # Check that original DataFrame is unchanged + expected_orig = DataFrame( + {"A": [1, 4, 1, 5], "B": [2, 5, 2, 6], "C": [3, 6, 1, 7]} + ) + tm.assert_frame_equal(dftest, expected_orig) + + # Check that view is modified correctly + expected_view = DataFrame( + {"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index + ) + tm.assert_frame_equal(df, expected_view) From 41926d4ac2b4a44413aacb91ac380b322d16c96d Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 16 Feb 2025 22:44:09 +0800 Subject: [PATCH 4/6] Add new entry for whatsnew/v2.3.0.rst --- doc/source/whatsnew/v2.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 8bdddb5b7f85d..4c180fa33ccf8 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -37,6 +37,7 @@ Other enhancements updated to work correctly with NumPy >= 2 (:issue:`57739`) - :meth:`Series.str.decode` result now has ``StringDtype`` when ``future.infer_string`` is True (:issue:`60709`) - :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype`` (:issue:`60663`) +- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option (:issue:`60309`) - The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns when backed by PyArrow (:issue:`60633`) - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`) From 1f9215cdea1f77dbf5b89f33fb117b0b77bfe5f1 Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 16 Feb 2025 23:52:46 +0800 Subject: [PATCH 5/6] Add type hint --- pandas/core/internals/managers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 289667c309d45..2e6701916a8d4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -572,6 +572,7 @@ def setitem(self, indexer, value) -> Self: 0, blk_loc, values ) # first block equals values + col_indexer: slice | np.ndarray if isinstance(indexer[1], slice) and indexer[1] == slice(None): col_indexer = slice(None) else: From fcffbf22d51968ec2b63f8c13ae98003db13212c Mon Sep 17 00:00:00 2001 From: Chilin Chiou Date: Sun, 2 Mar 2025 16:41:30 +0800 Subject: [PATCH 6/6] Rename view to df in test_iloc.py --- pandas/tests/indexing/test_iloc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index ef9f64e28ffa7..d38169c76d584 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1467,8 +1467,8 @@ def test_iloc_setitem_list_with_cow(self): ) tm.assert_frame_equal(dftest, expected_orig) - # Check that view is modified correctly - expected_view = DataFrame( + # Check that df is modified correctly + expected_df = DataFrame( {"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index ) - tm.assert_frame_equal(df, expected_view) + tm.assert_frame_equal(df, expected_df)