From f133354849f052cd122ec7380689b7371bf59abb Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 8 Feb 2023 16:34:07 +0000 Subject: [PATCH 1/3] BUG: big in Index._should_fallback_to_positional --- doc/source/whatsnew/v2.0.0.rst | 2 ++ pandas/core/indexes/base.py | 4 +--- pandas/tests/frame/indexing/test_indexing.py | 18 +++++++++++++++++ pandas/tests/series/indexing/test_indexing.py | 20 +++++++++++++++++++ 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7d028935ad175..748fa98a9d657 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1201,6 +1201,8 @@ Indexing - Bug in :meth:`Series.loc` raising error for out of bounds end of slice indexer (:issue:`50161`) - Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`) - Bug in :meth:`DataFrame.loc` raising ``IndexError`` when setting values for a pyarrow-backed column with a non-scalar indexer (:issue:`50085`) +- Bug in :meth:`DataFrame.__getitem__`, :meth:`Series.__getitem__`, :meth:`DataFrame.__setitem__` and :meth:`Series.__setitem__` + when indexing on indexes with extension float dtypes (:class:`Float64` & :class:`Float64`) or complex dtypes using integers (:issue:`xxxxx`) - Bug in :meth:`DataFrame.loc` modifying object when setting incompatible value with an empty indexer (:issue:`45981`) - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`) - Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ed2e3a7499728..1262407e802be 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5681,9 +5681,7 @@ def _should_fallback_to_positional(self) -> bool: """ Should an integer key be treated as positional? """ - if isinstance(self.dtype, np.dtype) and self.dtype.kind in ["i", "u", "f"]: - return False - return not self._holds_integer() + return not is_numeric_dtype(self.dtype) or is_bool_dtype(self.dtype) _index_shared_docs[ "get_indexer_non_unique" diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4bb7f62fb13bd..53249e2e989ee 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -57,6 +57,15 @@ def test_getitem(self, float_frame): with pytest.raises(KeyError, match="random"): float_frame["random"] + def test_getitem_numeric_should_not_fallback_to_positional(self, any_numeric_dtype): + # GHxxxxx + dtype = any_numeric_dtype + idx = Index([1, 0, 1], dtype=dtype) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=idx) + result = df[1] + expected = DataFrame([[1, 3], [4, 6]], columns=Index([1, 1], dtype=dtype)) + tm.assert_frame_equal(result, expected, check_exact=True) + def test_getitem2(self, float_frame): df = float_frame.copy() @@ -71,6 +80,15 @@ def test_getitem2(self, float_frame): res = df["@awesome_domain"] tm.assert_numpy_array_equal(ad, res.values) + def test_setitem_numeric_should_not_fallback_to_positional(self, any_numeric_dtype): + # GHxxxxx + dtype = any_numeric_dtype + idx = Index([1, 0, 1], dtype=dtype) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=idx) + df[1] = 10 + expected = DataFrame([[10, 2, 10], [10, 5, 10]], columns=idx) + tm.assert_frame_equal(df, expected, check_exact=True) + def test_setitem_list(self, float_frame): float_frame["E"] = "foo" diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index f214ade0a31aa..8b76969018d73 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -45,6 +45,26 @@ def test_basic_indexing(): s[5] = 0 +def test_getitem_numeric_should_not_fallback_to_positional(any_numeric_dtype): + # GHxxxxx + dtype = any_numeric_dtype + idx = Index([1, 0, 1], dtype=dtype) + ser = Series(range(3), index=idx) + result = ser[1] + expected = Series([0, 2], index=Index([1, 1], dtype=dtype)) + tm.assert_series_equal(result, expected, check_exact=True) + + +def test_setitem_numeric_should_not_fallback_to_positional(any_numeric_dtype): + # GHxxxxx + dtype = any_numeric_dtype + idx = Index([1, 0, 1], dtype=dtype) + ser = Series(range(3), index=idx) + ser[1] = 10 + expected = Series([10, 1, 10], index=idx) + tm.assert_series_equal(ser, expected, check_exact=True) + + def test_basic_getitem_with_labels(datetime_series): indices = datetime_series.index[[5, 10, 15]] From a53d91582da49947cfcecef89e80a344077f3325 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 8 Feb 2023 17:08:55 +0000 Subject: [PATCH 2/3] add gh number --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/tests/frame/indexing/test_indexing.py | 4 ++-- pandas/tests/series/indexing/test_indexing.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 748fa98a9d657..b471b722277dc 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1202,7 +1202,7 @@ Indexing - Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`) - Bug in :meth:`DataFrame.loc` raising ``IndexError`` when setting values for a pyarrow-backed column with a non-scalar indexer (:issue:`50085`) - Bug in :meth:`DataFrame.__getitem__`, :meth:`Series.__getitem__`, :meth:`DataFrame.__setitem__` and :meth:`Series.__setitem__` - when indexing on indexes with extension float dtypes (:class:`Float64` & :class:`Float64`) or complex dtypes using integers (:issue:`xxxxx`) + when indexing on indexes with extension float dtypes (:class:`Float64` & :class:`Float64`) or complex dtypes using integers (:issue:`51053`) - Bug in :meth:`DataFrame.loc` modifying object when setting incompatible value with an empty indexer (:issue:`45981`) - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`) - Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 53249e2e989ee..41706567d1f4f 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -58,7 +58,7 @@ def test_getitem(self, float_frame): float_frame["random"] def test_getitem_numeric_should_not_fallback_to_positional(self, any_numeric_dtype): - # GHxxxxx + # GH51053 dtype = any_numeric_dtype idx = Index([1, 0, 1], dtype=dtype) df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=idx) @@ -81,7 +81,7 @@ def test_getitem2(self, float_frame): tm.assert_numpy_array_equal(ad, res.values) def test_setitem_numeric_should_not_fallback_to_positional(self, any_numeric_dtype): - # GHxxxxx + # GH51053 dtype = any_numeric_dtype idx = Index([1, 0, 1], dtype=dtype) df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=idx) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 8b76969018d73..a8290f472cd7c 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -46,7 +46,7 @@ def test_basic_indexing(): def test_getitem_numeric_should_not_fallback_to_positional(any_numeric_dtype): - # GHxxxxx + # GH51053 dtype = any_numeric_dtype idx = Index([1, 0, 1], dtype=dtype) ser = Series(range(3), index=idx) @@ -56,7 +56,7 @@ def test_getitem_numeric_should_not_fallback_to_positional(any_numeric_dtype): def test_setitem_numeric_should_not_fallback_to_positional(any_numeric_dtype): - # GHxxxxx + # GH51053 dtype = any_numeric_dtype idx = Index([1, 0, 1], dtype=dtype) ser = Series(range(3), index=idx) From 9b594962c30bb40e1f3db6fa08f6909324f4fbe0 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Fri, 10 Feb 2023 06:53:11 +0000 Subject: [PATCH 3/3] fix perf --- pandas/core/indexes/base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1262407e802be..76aab53bd1659 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5681,7 +5681,12 @@ def _should_fallback_to_positional(self) -> bool: """ Should an integer key be treated as positional? """ - return not is_numeric_dtype(self.dtype) or is_bool_dtype(self.dtype) + return self.inferred_type not in { + "integer", + "mixed-integer", + "floating", + "complex", + } _index_shared_docs[ "get_indexer_non_unique"