diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index ab242ece98181..d06173d0ab6ef 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1044,6 +1044,7 @@ Indexing - Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`). - Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`) - Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`) +- Fixed a ``KeyError`` when indexing a :class:`MultiIndex`` level with a list containing exactly one label, which is missing (:issue:`27148`) - Bug which produced ``AttributeError`` on partial matching :class:`Timestamp` in a :class:`MultiIndex` (:issue:`26944`) - Bug in :class:`Categorical` and :class:`CategoricalIndex` with :class:`Interval` values when using the ``in`` operator (``__contains``) with objects that are not comparable to the values in the ``Interval`` (:issue:`23705`) - Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` on a :class:`DataFrame` with a single timezone-aware datetime64[ns] column incorrectly returning a scalar instead of a :class:`Series` (:issue:`27110`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ccc3a027af70d..0cac80e559715 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1856,7 +1856,7 @@ def _getitem_axis(self, key, axis=None): if ( not isinstance(key, tuple) - and len(key) > 1 + and len(key) and not isinstance(key[0], tuple) ): key = tuple([key]) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 9188adc7d6e93..d92cc00af6fce 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -130,6 +130,19 @@ def test_loc_multiindex_missing_label_raises(self): with pytest.raises(KeyError, match=r"^2$"): df.loc[2] + @pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])]) + def test_loc_multiindex_list_missing_label(self, key, pos): + # GH 27148 - lists with missing labels do not raise: + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + expected = df.iloc[pos] + result = df.loc[key] + tm.assert_frame_equal(result, expected) + def test_loc_multiindex_too_many_dims_raises(self): # GH 14885 s = Series( @@ -280,47 +293,27 @@ def convert_nested_indexer(indexer_type, keys): @pytest.mark.parametrize( - "indexer, is_level1, expected_error", + "indexer, pos", [ - ([], False, None), # empty ok - (["A"], False, None), - (["A", "D"], False, None), - (["D"], False, r"\['D'\] not in index"), # not any values found - (pd.IndexSlice[:, ["foo"]], True, None), - (pd.IndexSlice[:, ["foo", "bah"]], True, None), + ([], []), # empty ok + (["A"], slice(3)), + (["A", "D"], slice(3)), + (["D", "E"], []), # no values found - fine + (["D"], []), # same, with single item list: GH 27148 + (pd.IndexSlice[:, ["foo"]], slice(2, None, 3)), + (pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)), ], ) -def test_loc_getitem_duplicates_multiindex_missing_indexers( - indexer, is_level1, expected_error -): +def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos): # GH 7866 # multi-index slicing with missing indexers idx = MultiIndex.from_product( [["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"] ) s = Series(np.arange(9, dtype="int64"), index=idx).sort_index() - - if indexer == []: - expected = s.iloc[[]] - elif is_level1: - expected = Series( - [0, 3, 6], - index=MultiIndex.from_product( - [["A", "B", "C"], ["foo"]], names=["one", "two"] - ), - ).sort_index() - else: - exp_idx = MultiIndex.from_product( - [["A"], ["foo", "bar", "baz"]], names=["one", "two"] - ) - expected = Series(np.arange(3, dtype="int64"), index=exp_idx).sort_index() - - if expected_error is not None: - with pytest.raises(KeyError, match=expected_error): - s.loc[indexer] - else: - result = s.loc[indexer] - tm.assert_series_equal(result, expected) + expected = s.iloc[pos] + result = s.loc[indexer] + tm.assert_series_equal(result, expected) def test_series_loc_getitem_fancy(multiindex_year_month_day_dataframe_random_data): diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index 421ca71428bcc..692a86aa1a338 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -117,11 +117,11 @@ def test_per_axis_per_level_getitem(self): with pytest.raises(ValueError): df.loc[(slice(None), np.array([True, False])), :] - # ambiguous cases - # these can be multiply interpreted (e.g. in this case - # as df.loc[slice(None),[1]] as well - with pytest.raises(KeyError, match=r"'\[1\] not in index'"): - df.loc[slice(None), [1]] + # ambiguous notation + # this is interpreted as slicing on both axes (GH #16396) + result = df.loc[slice(None), [1]] + expected = df.iloc[:, []] + tm.assert_frame_equal(result, expected) result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]]