From dc514faeb96b1b7b1fe8e8e296b7a7a423ed9d1e Mon Sep 17 00:00:00 2001 From: eshaready Date: Wed, 17 Apr 2024 17:26:18 -0400 Subject: [PATCH 1/4] fixed the issue by making sure setting an item takes into account the axis argument if supplied to loc. added a test checking for this based on the github issue as well --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/indexing.py | 8 +++++--- pandas/tests/indexing/multiindex/test_loc.py | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7a4f709e56104..655f57fff6c35 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -397,7 +397,7 @@ Missing MultiIndex ^^^^^^^^^^ -- +- Bug in :func:`_LocationIndexer.__setitem__` where columns were added when they shouldn't be when using :func:`DataFrame.loc` with a multi index and specified axis (:issue:`58116`) - I/O diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c9b502add21e0..4061b7f6d513e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -757,7 +757,7 @@ def _get_setitem_indexer(self, key): """ if self.name == "loc": # always holds here bc iloc overrides _get_setitem_indexer - self._ensure_listlike_indexer(key) + self._ensure_listlike_indexer(key, axis=self.axis) if isinstance(key, tuple): for x in key: @@ -857,8 +857,10 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: if isinstance(key, tuple) and len(key) > 1: # key may be a tuple if we are .loc # if length of key is > 1 set key to column part - key = key[column_axis] - axis = column_axis + # unless axis is already specified! + if axis is None: + axis = column_axis + key = key[axis] if ( axis == column_axis diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 67b9ddebfb8bf..c3373a9e2a452 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -381,6 +381,22 @@ def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value): ) tm.assert_frame_equal(df, expected) + def test_multiindex_setitem_axis_set(self): + # GH#58116 + dates = pd.date_range("2001-01-01", freq="D", periods=2) + ids = ["i1", "i2", "i3"] + index = pd.MultiIndex.from_product([dates, ids], names=["date", "identifier"]) + df = pd.DataFrame(0.0, index=index, columns=["A", "B"]) + df.loc(axis=0)["2001-01-01", ["i1", "i3"]] = None + + expected = pd.DataFrame( + [[None, None], [0.0, 0.0], [None, None], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], + index = index, + columns = ["A", "B"], + ) + + tm.assert_frame_equal(df, expected) + def test_sorted_multiindex_after_union(self): # GH#44752 midx = MultiIndex.from_product( From 18fecdd20faad2078546173a2950770a16c37285 Mon Sep 17 00:00:00 2001 From: eshaready Date: Wed, 17 Apr 2024 19:29:57 -0400 Subject: [PATCH 2/4] Running pre commit checks --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4061b7f6d513e..b3ae53272cae4 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -857,7 +857,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: if isinstance(key, tuple) and len(key) > 1: # key may be a tuple if we are .loc # if length of key is > 1 set key to column part - # unless axis is already specified! + # unless axis is already specified, then go with that if axis is None: axis = column_axis key = key[axis] From a9aa7c0f8f45e2642fde51c1b48288cc02e1e1fd Mon Sep 17 00:00:00 2001 From: eshaready Date: Wed, 17 Apr 2024 19:33:24 -0400 Subject: [PATCH 3/4] Pre commit checks --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 655f57fff6c35..9217d24f789a8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -397,7 +397,7 @@ Missing MultiIndex ^^^^^^^^^^ -- Bug in :func:`_LocationIndexer.__setitem__` where columns were added when they shouldn't be when using :func:`DataFrame.loc` with a multi index and specified axis (:issue:`58116`) +- Bug in :func:`_LocationIndexer.__setitem__` where columns were added when they shouldn't be when using :func:`DataFrame.loc` with a multi index and specified axis (:issue:`58116`) - I/O diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index c3373a9e2a452..d482d20591a8a 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -385,14 +385,21 @@ def test_multiindex_setitem_axis_set(self): # GH#58116 dates = pd.date_range("2001-01-01", freq="D", periods=2) ids = ["i1", "i2", "i3"] - index = pd.MultiIndex.from_product([dates, ids], names=["date", "identifier"]) - df = pd.DataFrame(0.0, index=index, columns=["A", "B"]) + index = MultiIndex.from_product([dates, ids], names=["date", "identifier"]) + df = DataFrame(0.0, index=index, columns=["A", "B"]) df.loc(axis=0)["2001-01-01", ["i1", "i3"]] = None - expected = pd.DataFrame( - [[None, None], [0.0, 0.0], [None, None], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], - index = index, - columns = ["A", "B"], + expected = DataFrame( + [ + [None, None], + [0.0, 0.0], + [None, None], + [0.0, 0.0], + [0.0, 0.0], + [0.0, 0.0], + ], + index=index, + columns=["A", "B"], ) tm.assert_frame_equal(df, expected) From 00098ef681657cbd79b7e42f0abd77bc2eccb65e Mon Sep 17 00:00:00 2001 From: eshaready <91164675+eshaready@users.noreply.github.com> Date: Thu, 18 Apr 2024 13:17:15 -0400 Subject: [PATCH 4/4] Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9217d24f789a8..177a38b526c6e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -397,7 +397,7 @@ Missing MultiIndex ^^^^^^^^^^ -- Bug in :func:`_LocationIndexer.__setitem__` where columns were added when they shouldn't be when using :func:`DataFrame.loc` with a multi index and specified axis (:issue:`58116`) +- :func:`DataFrame.loc` with ``axis=0`` and :class:`MultiIndex` when setting a value adds extra columns (:issue:`58116`) - I/O