Skip to content

Commit 388ff01

Browse files
authored
Make get_loc with nan for FloatIndex consistent with other index types (#39382)
1 parent 8460340 commit 388ff01

File tree

8 files changed

+31
-31
lines changed

8 files changed

+31
-31
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ Indexing
337337
- Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`)
338338
- Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`)
339339
- Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrect casting the datetime64 values to integers (:issue:`39266`)
340+
- Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`)
340341
- Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`)
341342
- Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`)
342343

pandas/_libs/index_class_helper.pxi.in

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,14 @@ cdef class {{name}}Engine(IndexEngine):
5757
with warnings.catch_warnings():
5858
# e.g. if values is float64 and `val` is a str, suppress warning
5959
warnings.filterwarnings("ignore", category=FutureWarning)
60+
{{if name in {'Float64', 'Float32'} }}
61+
if util.is_nan(val):
62+
indexer = np.isnan(values)
63+
else:
64+
indexer = values == val
65+
{{else}}
6066
indexer = values == val
67+
{{endif}}
6168
except TypeError:
6269
# if the equality above returns a bool, cython will raise TypeError
6370
# when trying to cast it to ndarray

pandas/core/indexes/base.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2292,14 +2292,6 @@ def _isnan(self):
22922292
values.fill(False)
22932293
return values
22942294

2295-
@cache_readonly
2296-
@final
2297-
def _nan_idxs(self):
2298-
if self._can_hold_na:
2299-
return self._isnan.nonzero()[0]
2300-
else:
2301-
return np.array([], dtype=np.intp)
2302-
23032295
@cache_readonly
23042296
def hasnans(self) -> bool:
23052297
"""
@@ -3224,6 +3216,9 @@ def get_loc(self, key, method=None, tolerance=None):
32243216
except KeyError as err:
32253217
raise KeyError(key) from err
32263218

3219+
if is_scalar(key) and isna(key) and not self.hasnans:
3220+
raise KeyError(key)
3221+
32273222
if tolerance is not None:
32283223
tolerance = self._convert_tolerance(tolerance, np.asarray(key))
32293224

pandas/core/indexes/numeric.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -341,15 +341,6 @@ def get_loc(self, key, method=None, tolerance=None):
341341
if is_bool(key):
342342
# Catch this to avoid accidentally casting to 1.0
343343
raise KeyError(key)
344-
345-
if is_float(key) and np.isnan(key):
346-
nan_idxs = self._nan_idxs
347-
if not len(nan_idxs):
348-
raise KeyError(key)
349-
elif len(nan_idxs) == 1:
350-
return nan_idxs[0]
351-
return nan_idxs
352-
353344
return super().get_loc(key, method=method, tolerance=tolerance)
354345

355346
# ----------------------------------------------------------------

pandas/tests/indexes/datetimes/test_ops.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,16 +316,13 @@ def test_nat(self, tz_naive_fixture):
316316
idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
317317
assert idx._can_hold_na
318318

319-
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
320319
assert idx.hasnans is False
321-
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
322320

323321
idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
324322
assert idx._can_hold_na
325323

326324
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
327325
assert idx.hasnans is True
328-
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
329326

330327
@pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
331328
@pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])

pandas/tests/indexes/numeric/test_indexing.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11
import numpy as np
22
import pytest
33

4-
from pandas import Float64Index, Index, Int64Index, RangeIndex, Series, UInt64Index
4+
from pandas import (
5+
Float64Index,
6+
Index,
7+
Int64Index,
8+
RangeIndex,
9+
Series,
10+
Timestamp,
11+
UInt64Index,
12+
)
513
import pandas._testing as tm
614

715

@@ -102,13 +110,10 @@ def test_get_loc_na(self):
102110
idx = Float64Index([np.nan, 1, np.nan])
103111
assert idx.get_loc(1) == 1
104112

105-
# FIXME: dont leave commented-out
106113
# representable by slice [0:2:2]
107-
# pytest.raises(KeyError, idx.slice_locs, np.nan)
108-
sliced = idx.slice_locs(np.nan)
109-
assert isinstance(sliced, tuple)
110-
assert sliced == (0, 3)
111-
114+
msg = "'Cannot get left slice bound for non-unique label: nan'"
115+
with pytest.raises(KeyError, match=msg):
116+
idx.slice_locs(np.nan)
112117
# not representable by slice
113118
idx = Float64Index([np.nan, 1, np.nan, np.nan])
114119
assert idx.get_loc(1) == 1
@@ -128,6 +133,14 @@ def test_get_loc_missing_nan(self):
128133
# listlike/non-hashable raises TypeError
129134
idx.get_loc([np.nan])
130135

136+
@pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]])
137+
@pytest.mark.parametrize("method", ["nearest", "pad", "backfill"])
138+
def test_get_loc_float_index_nan_with_method(self, vals, method):
139+
# GH#39382
140+
idx = Index(vals)
141+
with pytest.raises(KeyError, match="nan"):
142+
idx.get_loc(np.nan, method=method)
143+
131144

132145
class TestGetIndexer:
133146
def test_get_indexer(self):

pandas/tests/indexes/period/test_ops.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,14 +278,12 @@ def test_nat(self):
278278

279279
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
280280
assert idx.hasnans is False
281-
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
282281

283282
idx = PeriodIndex(["2011-01-01", "NaT"], freq="D")
284283
assert idx._can_hold_na
285284

286285
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
287286
assert idx.hasnans is True
288-
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
289287

290288
def test_freq_setter_deprecated(self):
291289
# GH 20678

pandas/tests/indexes/timedeltas/test_ops.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,14 +217,12 @@ def test_nat(self):
217217

218218
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
219219
assert idx.hasnans is False
220-
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
221220

222221
idx = TimedeltaIndex(["1 days", "NaT"])
223222
assert idx._can_hold_na
224223

225224
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
226225
assert idx.hasnans is True
227-
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
228226

229227
@pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
230228
@pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])

0 commit comments

Comments
 (0)