From e704057dd8db43df0c995e80071392862b77873d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 9 Jul 2021 09:31:19 -0700 Subject: [PATCH 1/3] BUG: MultiIndex.get_loc with string key on DatetimeIndex level --- pandas/core/indexes/multi.py | 19 ++++++++-- .../indexes/multi/test_partial_indexing.py | 37 +++++++++++++++++++ pandas/tests/indexing/multiindex/test_loc.py | 13 +++++++ 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8903d29782610..d220c9876aa06 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2854,7 +2854,12 @@ def _maybe_to_slice(loc): ) if keylen == self.nlevels and self.is_unique: - return self._engine.get_loc(key) + try: + return self._engine.get_loc(key) + except TypeError: + # e.g. partial string slicing + loc, _ = self.get_loc_level(key, list(range(self.nlevels))) + return loc # -- partial selection or non-unique index # break the key into 2 parts based on the lexsort_depth of the index; @@ -3153,15 +3158,21 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): if level > 0 or self._lexsort_depth == 0: # Desired level is not sorted - locs = np.array(level_codes == idx, dtype=bool, copy=False) + if isinstance(idx, slice): + # test_get_loc_partial_timestamp_multiindex + locs = (level_codes >= idx.start) & (level_codes < idx.stop) + else: + locs = np.array(level_codes == idx, dtype=bool, copy=False) if not locs.any(): # The label is present in self.levels[level] but unused: raise KeyError(key) return locs if isinstance(idx, slice): - start = idx.start - end = idx.stop + # test_get_loc_partial_timestamp_multiindex + locs = (level_codes >= idx.start) & (level_codes < idx.stop) + locs = lib.maybe_booleans_to_slice(locs) + return locs else: start = level_codes.searchsorted(idx, side="left") end = level_codes.searchsorted(idx, side="right") diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py index 286522f6b946d..cf0674c418c7f 100644 --- a/pandas/tests/indexes/multi/test_partial_indexing.py +++ b/pandas/tests/indexes/multi/test_partial_indexing.py @@ -1,3 +1,4 @@ +import numpy as np import pytest from pandas import ( @@ -45,6 +46,42 @@ def test_partial_string_matching_single_index(df): tm.assert_frame_equal(result, expected) +def test_get_loc_partial_timestamp_multiindex(df): + mi = df.index + key = ("2016-01-01", "a") + loc = mi.get_loc(key) + + expected = np.zeros(len(mi), dtype=bool) + expected[[0, 3]] = True + tm.assert_numpy_array_equal(loc, expected) + + key2 = ("2016-01-02", "a") + loc2 = mi.get_loc(key2) + expected2 = np.zeros(len(mi), dtype=bool) + expected2[[6, 9]] = True + tm.assert_numpy_array_equal(loc2, expected2) + + key3 = ("2016-01", "a") + loc3 = mi.get_loc(key3) + expected3 = np.zeros(len(mi), dtype=bool) + expected3[mi.get_level_values(1).get_loc("a")] = True + tm.assert_numpy_array_equal(loc3, expected3) + + key4 = ("2016", "a") + loc4 = mi.get_loc(key4) + expected4 = expected3 + tm.assert_numpy_array_equal(loc4, expected4) + + # non-monotonic + taker = np.arange(len(mi), dtype=np.intp) + taker[::2] = taker[::-2] + mi2 = mi.take(taker) + loc5 = mi2.get_loc(key) + expected5 = np.zeros(len(mi2), dtype=bool) + expected5[[3, 14]] = True + tm.assert_numpy_array_equal(loc5, expected5) + + def test_partial_string_timestamp_multiindex(df): # GH10331 df_swap = df.swaplevel(0, 1).sort_index() diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index bc59c51e359ae..a2feeb7ea3d09 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -736,6 +736,19 @@ def test_get_loc_datetime_index(): assert mi.get_loc("2001-01") == slice(0, 31, None) assert index.get_loc("2001-01") == slice(0, 31, None) + loc = mi[::2].get_loc("2001-01") + expected = index[::2].get_loc("2001-01") + assert loc == expected + + loc = mi.repeat(2).get_loc("2001-01") + expected = index.repeat(2).get_loc("2001-01") + assert loc == expected + + loc = mi.append(mi).get_loc("2001-01") + expected = index.append(index).get_loc("2001-01") + # TODO: standardize return type for MultiIndex.get_loc + tm.assert_numpy_array_equal(loc.nonzero()[0], expected) + def test_loc_setitem_indexer_differently_ordered(): # GH#34603 From b2597b5ac3f9fd6ca77756d39f27b5cfbe1b1fa7 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 9 Jul 2021 09:40:05 -0700 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index d114f26788f00..c241fc5cbeb4d 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -223,6 +223,7 @@ Missing MultiIndex ^^^^^^^^^^ +- Bug in :meth:`MultiIndex.get_loc` where the first level is a :class:`DatetimeIndex` and a string key is passed (:issue:`??`) - Bug in :meth:`MultiIndex.reindex` when passing a ``level`` that corresponds to an ``ExtensionDtype`` level (:issue:`42043`) - From 2d316f96e179ad472e50adfa014392017472f451 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 9 Jul 2021 09:42:06 -0700 Subject: [PATCH 3/3] GH ref --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index c241fc5cbeb4d..e86bf0b88b662 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -223,7 +223,7 @@ Missing MultiIndex ^^^^^^^^^^ -- Bug in :meth:`MultiIndex.get_loc` where the first level is a :class:`DatetimeIndex` and a string key is passed (:issue:`??`) +- Bug in :meth:`MultiIndex.get_loc` where the first level is a :class:`DatetimeIndex` and a string key is passed (:issue:`42465`) - Bug in :meth:`MultiIndex.reindex` when passing a ``level`` that corresponds to an ``ExtensionDtype`` level (:issue:`42043`) -