diff --git a/pandas/index.pyx b/pandas/index.pyx index dad2b26e13412..25e6f35ad2a0d 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -143,6 +143,8 @@ cdef class IndexEngine: return self._get_loc_duplicates(val) values = self._get_index_values() loc = _bin_search(values, val) # .searchsorted(val, side='left') + if loc >= len(values): + raise KeyError(val) if util.get_value_at(values, loc) != val: raise KeyError(val) return loc diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 390dbdd76a266..105ce37c1c51d 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2052,6 +2052,23 @@ def test_equals_operator(self): # GH9785 self.assertTrue((self.index == self.index).all()) + def test_large_multiindex_error(self): + # GH12527 + df_below_1000000 = pd.DataFrame( + 1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]), + columns=['dest']) + with assertRaises(KeyError): + df_below_1000000.loc[(-1, 0), 'dest'] + with assertRaises(KeyError): + df_below_1000000.loc[(3, 0), 'dest'] + df_above_1000000 = pd.DataFrame( + 1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]), + columns=['dest']) + with assertRaises(KeyError): + df_above_1000000.loc[(-1, 0), 'dest'] + with assertRaises(KeyError): + df_above_1000000.loc[(3, 0), 'dest'] + def test_partial_string_timestamp_multiindex(self): # GH10331 dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H')