Skip to content

BUG: fix KeyError with list of a single, missing, element #27154

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,7 @@ Indexing
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
- Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`)
- Fixed a ``KeyError`` when indexing a :class:`MultiIndex`` level with a list containing exactly one label, which is missing (:issue:`27148`)
- Bug which produced ``AttributeError`` on partial matching :class:`Timestamp` in a :class:`MultiIndex` (:issue:`26944`)
- Bug in :class:`Categorical` and :class:`CategoricalIndex` with :class:`Interval` values when using the ``in`` operator (``__contains``) with objects that are not comparable to the values in the ``Interval`` (:issue:`23705`)
- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` on a :class:`DataFrame` with a single timezone-aware datetime64[ns] column incorrectly returning a scalar instead of a :class:`Series` (:issue:`27110`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1856,7 +1856,7 @@ def _getitem_axis(self, key, axis=None):

if (
not isinstance(key, tuple)
and len(key) > 1
and len(key)
and not isinstance(key[0], tuple)
):
key = tuple([key])
Expand Down
57 changes: 25 additions & 32 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,19 @@ def test_loc_multiindex_missing_label_raises(self):
with pytest.raises(KeyError, match=r"^2$"):
df.loc[2]

@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
def test_loc_multiindex_list_missing_label(self, key, pos):
# GH 27148 - lists with missing labels do not raise:
df = DataFrame(
np.random.randn(3, 3),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)

expected = df.iloc[pos]
result = df.loc[key]
tm.assert_frame_equal(result, expected)

def test_loc_multiindex_too_many_dims_raises(self):
# GH 14885
s = Series(
Expand Down Expand Up @@ -280,47 +293,27 @@ def convert_nested_indexer(indexer_type, keys):


@pytest.mark.parametrize(
"indexer, is_level1, expected_error",
"indexer, pos",
[
([], False, None), # empty ok
(["A"], False, None),
(["A", "D"], False, None),
(["D"], False, r"\['D'\] not in index"), # not any values found
(pd.IndexSlice[:, ["foo"]], True, None),
(pd.IndexSlice[:, ["foo", "bah"]], True, None),
([], []), # empty ok
(["A"], slice(3)),
(["A", "D"], slice(3)),
(["D", "E"], []), # no values found - fine
(["D"], []), # same, with single item list: GH 27148
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
],
)
def test_loc_getitem_duplicates_multiindex_missing_indexers(
indexer, is_level1, expected_error
):
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
# GH 7866
# multi-index slicing with missing indexers
idx = MultiIndex.from_product(
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
)
s = Series(np.arange(9, dtype="int64"), index=idx).sort_index()

if indexer == []:
expected = s.iloc[[]]
elif is_level1:
expected = Series(
[0, 3, 6],
index=MultiIndex.from_product(
[["A", "B", "C"], ["foo"]], names=["one", "two"]
),
).sort_index()
else:
exp_idx = MultiIndex.from_product(
[["A"], ["foo", "bar", "baz"]], names=["one", "two"]
)
expected = Series(np.arange(3, dtype="int64"), index=exp_idx).sort_index()

if expected_error is not None:
with pytest.raises(KeyError, match=expected_error):
s.loc[indexer]
else:
result = s.loc[indexer]
tm.assert_series_equal(result, expected)
expected = s.iloc[pos]
result = s.loc[indexer]
tm.assert_series_equal(result, expected)


def test_series_loc_getitem_fancy(multiindex_year_month_day_dataframe_random_data):
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/indexing/multiindex/test_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ def test_per_axis_per_level_getitem(self):
with pytest.raises(ValueError):
df.loc[(slice(None), np.array([True, False])), :]

# ambiguous cases
# these can be multiply interpreted (e.g. in this case
# as df.loc[slice(None),[1]] as well
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
df.loc[slice(None), [1]]
# ambiguous notation
# this is interpreted as slicing on both axes (GH #16396)
result = df.loc[slice(None), [1]]
expected = df.iloc[:, []]
tm.assert_frame_equal(result, expected)

result = df.loc[(slice(None), [1]), :]
expected = df.iloc[[0, 3]]
Expand Down