From f4364a611c28add28553ea23ca5af507447b68a1 Mon Sep 17 00:00:00 2001 From: Younggun Kim Date: Sun, 26 Jul 2015 23:23:24 +0900 Subject: [PATCH] BUG: #10645 in using MultiIndex.__contains__ --- doc/source/whatsnew/v0.17.0.txt | 1 + pandas/index.pyx | 5 +++++ pandas/tests/test_index.py | 8 ++++++++ 3 files changed, 14 insertions(+) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index face3a1002bae..ccd2cd2f2d289 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -398,3 +398,4 @@ Bug Fixes - Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`) - Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`) +- Bug in ``MultiIndex.__contains__`` throws an ``IndexError`` for large multiindices (:issue:`10645`) diff --git a/pandas/index.pyx b/pandas/index.pyx index 1678e3b280ee5..9259ed922eb13 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -143,6 +143,11 @@ cdef class IndexEngine: return self._get_loc_duplicates(val) values = self._get_index_values() loc = _bin_search(values, val) # .searchsorted(val, side='left') + + # GH10675 + if len(values) <= loc or 0 > loc: + raise KeyError(val) + if util.get_value_at(values, loc) != val: raise KeyError(val) return loc diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 0b592368e2a1c..b0a60ee558123 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1220,6 +1220,14 @@ def test_get_loc(self): with tm.assertRaises(TypeError): idx.get_loc('a', method='nearest') + def test_get_loc_keyerror(self): + # GH10645 + mi = pd.MultiIndex.from_arrays([range(100), range(100)]) + self.assertRaises(KeyError, lambda: mi.get_loc((1000001, 0))) + + mi = pd.MultiIndex.from_arrays([range(1000000), range(1000000)]) + self.assertRaises(KeyError, lambda: mi.get_loc((1000001, 0))) + def test_slice_locs(self): for dtype in [int, float]: idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))