From 5dc275799d68212bbae94c7431e25b08c7d9764d Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 7 May 2021 23:50:55 +0200 Subject: [PATCH 1/4] BUG: loc casting to float for scalar with MultiIndex df --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/indexing.py | 16 ++++++---------- pandas/tests/indexing/multiindex/test_loc.py | 9 +++++++++ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index cf3dd1b0e3226..6781c52293e0b 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -788,6 +788,7 @@ Indexing - Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`) - Bug in :meth:`DataFrame.__setitem__` and :meth:`DataFrame.iloc.__setitem__` raising ``ValueError`` when trying to index with a row-slice and setting a list as values (:issue:`40440`) - Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`) +- Bug in :meth:`DataFrame.loc.__getitem__` with :class:`MultiIndex` casting to float when at least one column is from has float dtype and we retrieve a scalar (:issue:`41369`) - Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`) - Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b267472eba573..2011be9f6e77b 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -888,26 +888,22 @@ def _getitem_nested_tuple(self, tup: tuple): # handle the multi-axis by taking sections and reducing # this is iterative obj = self.obj - axis = 0 - for key in tup: + # GH#41369 Loop in reverse order to avoid dtype conversion when converting df + # row to a series + axis = len(tup) - 1 + for key in tup[::-1]: if com.is_null_slice(key): - axis += 1 + axis -= 1 continue - current_ndim = obj.ndim obj = getattr(obj, self.name)._getitem_axis(key, axis=axis) - axis += 1 + axis -= 1 # if we have a scalar, we are done if is_scalar(obj) or not hasattr(obj, "ndim"): break - # has the dim of the obj changed? - # GH 7199 - if obj.ndim < current_ndim: - axis -= 1 - return obj def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 96d2c246dd0ee..fce7ae9086771 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -776,3 +776,12 @@ def test_loc_getitem_drops_levels_for_one_row_dataframe(): result = ser.loc["x", :, "z"] expected = Series([0], index=Index(["y"], name="b")) tm.assert_series_equal(result, expected) + + +def test_loc_get_scalar_casting_to_float(): + # GH#41369 + df = DataFrame( + {"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"]) + ) + result = df.loc[(3, 4), "b"] + assert result == 2 From 858299e2223928e49cc6f5b8f875ec4abafe80d6 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 10 May 2021 00:52:25 +0200 Subject: [PATCH 2/4] Adjust comment --- pandas/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2011be9f6e77b..4cab796a74de3 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -888,8 +888,8 @@ def _getitem_nested_tuple(self, tup: tuple): # handle the multi-axis by taking sections and reducing # this is iterative obj = self.obj - # GH#41369 Loop in reverse order to avoid dtype conversion when converting df - # row to a series + # GH#41369 Loop in reverse order ensures indexing along columns before rows + # which selects only necessary blocks which avoids dtype conversion if possible axis = len(tup) - 1 for key in tup[::-1]: From 01dde93575873785209b2c4daea5e570c1d0d490 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 12 May 2021 11:16:00 +0200 Subject: [PATCH 3/4] Add iloc example --- pandas/tests/indexing/multiindex/test_loc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 568926d01efdc..cba809092c440 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -797,3 +797,5 @@ def test_loc_get_scalar_casting_to_float(): ) result = df.loc[(3, 4), "b"] assert result == 2 + result = df.loc[[(3, 4)], "b"].iloc[0] + assert result == 2 From 832c236261595a15ee0293017119fa7f2fcb4dab Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 25 May 2021 10:41:01 +0200 Subject: [PATCH 4/4] Add isinstance check --- pandas/tests/indexing/multiindex/test_loc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 3a3bae24ebb62..f9e2d1280b33a 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -840,5 +840,7 @@ def test_loc_get_scalar_casting_to_float(): ) result = df.loc[(3, 4), "b"] assert result == 2 + assert isinstance(result, np.int64) result = df.loc[[(3, 4)], "b"].iloc[0] assert result == 2 + assert isinstance(result, np.int64)