From 1b7dc81235db94bbedab276296731954e1a742a2 Mon Sep 17 00:00:00 2001 From: anzor Date: Tue, 15 Oct 2024 22:59:39 +0300 Subject: [PATCH 1/8] Fix issue #59333 --- pandas/core/indexing.py | 6 +++--- pandas/tests/indexing/test_loc.py | 33 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 08bd3cde60806..675e5145c5856 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2353,7 +2353,6 @@ def ravel(i): return i.ravel() if isinstance(i, np.ndarray) else i indexer = tuple(map(ravel, indexer)) - aligners = [not com.is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 @@ -2371,7 +2370,9 @@ def ravel(i): # we have a frame, with multiple indexers on both axes; and a # series, so need to broadcast (see GH5206) - if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer): + if len(indexer) == self.ndim and all( + is_sequence(_) or isinstance(_, slice) for _ in indexer + ): ser_values = ser.reindex(obj.axes[0][indexer[0]])._values # single indexer @@ -2399,7 +2400,6 @@ def ravel(i): if using_cow: return ser return ser._values.copy() - return ser.reindex(new_ix)._values # 2 dims diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 36b08ee1df790..643a5e126bb34 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3297,3 +3297,36 @@ def test_loc_reindexing_of_empty_index(self): df.loc[Series([False] * 4, index=df.index, name=0), 0] = df[0] expected = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) tm.assert_frame_equal(df, expected) + + def test_loc_set_series_to_multiple_columns(self): + # GH 59933 + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")) + df.loc[0:3, ["A", "B", "C"]] = Series([10, 20, 30]) + expected = DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")) + df.loc[:, ["A", "B", "C"]] = Series([10, 20, 30]) + expected = DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) + df.loc[df["A"] > 0, ["A", "B", "C"]] = Series([10, 20, 30]) + expected = DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) + df.loc[:, ["A", "B", "C"]] = Series([10, 20, 30]) + expected = DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) + df.loc[0:4, ["A", "B", "C"]] = Series([10, 20, 30]) + expected = DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ) + tm.assert_frame_equal(df, expected) From bab14ffb49cbac89248ff077321a6a22b36d4bae Mon Sep 17 00:00:00 2001 From: anzor Date: Wed, 16 Oct 2024 10:00:10 +0300 Subject: [PATCH 2/8] remove condition --- pandas/core/indexing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 675e5145c5856..a7323ceb13da6 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2353,6 +2353,7 @@ def ravel(i): return i.ravel() if isinstance(i, np.ndarray) else i indexer = tuple(map(ravel, indexer)) + aligners = [not com.is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 @@ -2370,9 +2371,7 @@ def ravel(i): # we have a frame, with multiple indexers on both axes; and a # series, so need to broadcast (see GH5206) - if len(indexer) == self.ndim and all( - is_sequence(_) or isinstance(_, slice) for _ in indexer - ): + if all(is_sequence(_) or isinstance(_, slice) for _ in indexer): ser_values = ser.reindex(obj.axes[0][indexer[0]])._values # single indexer @@ -2400,6 +2399,7 @@ def ravel(i): if using_cow: return ser return ser._values.copy() + return ser.reindex(new_ix)._values # 2 dims From 19a149c3d5847b4222ea470ff03e142eebf0446d Mon Sep 17 00:00:00 2001 From: anzor Date: Wed, 23 Oct 2024 16:59:24 +0300 Subject: [PATCH 3/8] fix case with two slices, test added --- pandas/core/indexing.py | 5 ++++- pandas/tests/indexing/test_loc.py | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a7323ceb13da6..4efcb1ecc03de 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2376,7 +2376,10 @@ def ravel(i): # single indexer if len(indexer) > 1 and not multiindex_indexer: - len_indexer = len(indexer[1]) + if isinstance(indexer[1], slice): + len_indexer = len(obj.loc[indexer[1]].axes[1]) + else: + len_indexer = len(indexer[1]) ser_values = ( np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T ) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 643a5e126bb34..b50f5c96b544d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3330,3 +3330,10 @@ def test_loc_set_series_to_multiple_columns(self): [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") ) tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) + df.loc[:, "A":"C"] = Series([10, 20, 30]) + expected = DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ) + tm.assert_frame_equal(df, expected) From 27f1122bc0a0e42ba0279b02bc24707ffad99245 Mon Sep 17 00:00:00 2001 From: anzor Date: Thu, 31 Oct 2024 00:11:00 +0300 Subject: [PATCH 4/8] simpler getting len_indexer (without .loc) --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4efcb1ecc03de..44b3e623e398e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2377,7 +2377,7 @@ def ravel(i): # single indexer if len(indexer) > 1 and not multiindex_indexer: if isinstance(indexer[1], slice): - len_indexer = len(obj.loc[indexer[1]].axes[1]) + len_indexer = len(obj.axes[1][indexer[1]]) else: len_indexer = len(indexer[1]) ser_values = ( From c7a149c4437b952fb3b07d6ce79789eac21467dc Mon Sep 17 00:00:00 2001 From: anzor Date: Sun, 17 Nov 2024 21:07:19 +0300 Subject: [PATCH 5/8] Used @pytest.mark.parametrize in tests, added whatsnew --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/indexing/test_loc.py | 89 ++++++++++++++++++------------- 2 files changed, 52 insertions(+), 39 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7da2f968b900b..93de6807afada 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -665,7 +665,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) -- +- Fixed bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`599333`) Missing ^^^^^^^ diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 032cd6c01e441..f11f79960dcb1 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3298,42 +3298,55 @@ def test_loc_reindexing_of_empty_index(self): expected = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) tm.assert_frame_equal(df, expected) - def test_loc_set_series_to_multiple_columns(self): + @pytest.mark.parametrize( + "df, row_index, col_index, setting_series, expected_df", + [ + [ + DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")), + slice(0, 3), + ["A", "B", "C"], + Series([10, 20, 30]), + DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")), + ], + [ + DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")), + slice(None), + ["A", "B", "C"], + Series([10, 20, 30]), + DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")), + ], + [ + DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), + [True, True, True], + ["A", "B", "C"], + Series([10, 20, 30]), + DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ), + ], + [ + DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), + slice(0, 4), + ["A", "B", "C"], + Series([10, 20, 30]), + DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ), + ], + [ + DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), + slice(None), + slice("A", "C"), + Series([10, 20, 30]), + DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ), + ], + ], + ) + def test_loc_set_series_to_multiple_columns( + self, df, row_index, col_index, setting_series, expected_df + ): # GH 59933 - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")) - df.loc[0:3, ["A", "B", "C"]] = Series([10, 20, 30]) - expected = DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")) - tm.assert_frame_equal(df, expected) - - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")) - df.loc[:, ["A", "B", "C"]] = Series([10, 20, 30]) - expected = DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")) - tm.assert_frame_equal(df, expected) - - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) - df.loc[df["A"] > 0, ["A", "B", "C"]] = Series([10, 20, 30]) - expected = DataFrame( - [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") - ) - tm.assert_frame_equal(df, expected) - - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) - df.loc[:, ["A", "B", "C"]] = Series([10, 20, 30]) - expected = DataFrame( - [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") - ) - tm.assert_frame_equal(df, expected) - - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) - df.loc[0:4, ["A", "B", "C"]] = Series([10, 20, 30]) - expected = DataFrame( - [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") - ) - tm.assert_frame_equal(df, expected) - - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) - df.loc[:, "A":"C"] = Series([10, 20, 30]) - expected = DataFrame( - [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") - ) - tm.assert_frame_equal(df, expected) + df.loc[row_index, col_index] = setting_series + tm.assert_frame_equal(df, expected_df) From fdc09049a07e30658a27ace837c85a552e9f7cb2 Mon Sep 17 00:00:00 2001 From: anzor Date: Sun, 17 Nov 2024 21:19:44 +0300 Subject: [PATCH 6/8] Remove unnecessary parameter --- pandas/tests/indexing/test_loc.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index f11f79960dcb1..f9a67ae3b0d60 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3299,27 +3299,24 @@ def test_loc_reindexing_of_empty_index(self): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( - "df, row_index, col_index, setting_series, expected_df", + "df, row_index, col_index, expected_df", [ [ DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")), slice(0, 3), ["A", "B", "C"], - Series([10, 20, 30]), DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")), ], [ DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")), slice(None), ["A", "B", "C"], - Series([10, 20, 30]), DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")), ], [ DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), [True, True, True], ["A", "B", "C"], - Series([10, 20, 30]), DataFrame( [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") ), @@ -3328,7 +3325,6 @@ def test_loc_reindexing_of_empty_index(self): DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), slice(0, 4), ["A", "B", "C"], - Series([10, 20, 30]), DataFrame( [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") ), @@ -3337,7 +3333,6 @@ def test_loc_reindexing_of_empty_index(self): DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), slice(None), slice("A", "C"), - Series([10, 20, 30]), DataFrame( [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") ), @@ -3345,8 +3340,8 @@ def test_loc_reindexing_of_empty_index(self): ], ) def test_loc_set_series_to_multiple_columns( - self, df, row_index, col_index, setting_series, expected_df + self, df, row_index, col_index, expected_df ): # GH 59933 - df.loc[row_index, col_index] = setting_series + df.loc[row_index, col_index] = Series([10, 20, 30]) tm.assert_frame_equal(df, expected_df) From 12d38eb2eff5560e9af236695e388ceb43e907f0 Mon Sep 17 00:00:00 2001 From: anzor Date: Mon, 30 Dec 2024 00:18:00 +0300 Subject: [PATCH 7/8] fix issue with indexing by boolean series --- pandas/core/indexing.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0cec8e63fdbb0..9d1cf69722600 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2353,11 +2353,17 @@ def _align_series( if isinstance(indexer, tuple): # flatten np.ndarray indexers + if ( + len(indexer) == 2 + and isinstance(indexer[1], np.ndarray) + and indexer[1].dtype == np.bool_ + ): + indexer = (indexer[0], np.where(indexer[1])[0]) + def ravel(i): return i.ravel() if isinstance(i, np.ndarray) else i indexer = tuple(map(ravel, indexer)) - aligners = [not com.is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 From 163dfe31839f969274f3da5017742b55ea7b6ff5 Mon Sep 17 00:00:00 2001 From: anzor Date: Mon, 30 Dec 2024 00:25:45 +0300 Subject: [PATCH 8/8] add unit-test --- .gitignore | 2 ++ pandas/tests/indexing/test_loc.py | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/.gitignore b/.gitignore index a188e216d9f70..cdbf1384c96bd 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,8 @@ .vagrant .noseids .ipynb_checkpoints +.ipynb +.csv .tags .cache/ .vscode/ diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 94b5f47f5ca33..d7781688ae414 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3337,6 +3337,18 @@ def test_loc_reindexing_of_empty_index(self): [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") ), ], + [ + DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), + slice(None), + Series( + { + "A": True, + "C": False, + "B": True, + } + ), + DataFrame([[10, 10, 3], [20, 20, 6], [30, 30, 9]], columns=list("ABC")), + ], ], ) def test_loc_set_series_to_multiple_columns(