From b0f1373e9873fae9fd55c8b08ac5e25c104e92ee Mon Sep 17 00:00:00 2001 From: ssche Date: Fri, 16 Jun 2023 09:59:49 +0200 Subject: [PATCH 1/2] Fixed #52234 * Copied fix from https://github.com/ltauro/pandas/tree/nan-not-in-index and applied review comments --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/_libs/index.pyx | 6 ++++-- .../tests/indexes/base_class/test_indexing.py | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 4dbaf65c9e7ab..cd7961234f0df 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -422,6 +422,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__setitem__` losing dtype when setting a :class:`DataFrame` into duplicated columns (:issue:`53143`) - Bug in :meth:`DataFrame.__setitem__` with a boolean mask and :meth:`DataFrame.putmask` with mixed non-numeric dtypes and a value other than ``NaN`` incorrectly raising ``TypeError`` (:issue:`53291`) +- Bug when using ``nan`` to index (:issue:`52234`) Missing ^^^^^^^ diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 18e0bd4014ac8..872c98e91a609 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -360,6 +360,8 @@ cdef class IndexEngine: values = self.values stargets = set(targets) + na_in_stargets = any(checknull(t) for t in stargets) + n = len(values) n_t = len(targets) if n > 10_000: @@ -374,7 +376,7 @@ cdef class IndexEngine: if ( stargets and len(stargets) < 5 and - not any([checknull(t) for t in stargets]) and + not na_in_stargets and self.is_monotonic_increasing ): # if there are few enough stargets and the index is monotonically @@ -396,7 +398,7 @@ cdef class IndexEngine: # otherwise, map by iterating through all items in the index # short-circuit na check - if values.dtype == object: + if na_in_stargets: check_na_values = True # keep track of nas in values found_nas = set() diff --git a/pandas/tests/indexes/base_class/test_indexing.py b/pandas/tests/indexes/base_class/test_indexing.py index 070fec47b90e6..51aa7e26b3911 100644 --- a/pandas/tests/indexes/base_class/test_indexing.py +++ b/pandas/tests/indexes/base_class/test_indexing.py @@ -37,6 +37,24 @@ def test_get_indexer_non_unique_dtype_mismatch(self): tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes) tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing) + @pytest.mark.parametrize( + "idx_values,idx_non_unique", + [ + ([np.nan, 100, 200, 100], [np.nan, 100]), + ([np.nan, 100.0, 200.0, 100.0], [np.nan, 100.0]), + ], + ) + def test_get_indexer_non_unique_int_index(self, idx_values, idx_non_unique): + indexes, missing = Index(idx_values).get_indexer_non_unique(Index([np.nan])) + tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes) + tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing) + + indexes, missing = Index(idx_values).get_indexer_non_unique( + Index(idx_non_unique) + ) + tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes) + tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing) + class TestGetLoc: @pytest.mark.slow # to_flat_index takes a while From 94a5b0dad5d2b97ee6f3baf0e51c1947d56e3421 Mon Sep 17 00:00:00 2001 From: Sven Date: Tue, 20 Jun 2023 20:40:05 +0200 Subject: [PATCH 2/2] Update doc/source/whatsnew/v2.1.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index cd7961234f0df..430b92e2e3025 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -422,7 +422,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__setitem__` losing dtype when setting a :class:`DataFrame` into duplicated columns (:issue:`53143`) - Bug in :meth:`DataFrame.__setitem__` with a boolean mask and :meth:`DataFrame.putmask` with mixed non-numeric dtypes and a value other than ``NaN`` incorrectly raising ``TypeError`` (:issue:`53291`) -- Bug when using ``nan`` to index (:issue:`52234`) +- Bug in :meth:`DataFrame.iloc` when using ``nan`` as the only element (:issue:`52234`) Missing ^^^^^^^