From 1cf6630b83661f138e5c3e9dc3d260ae2e765461 Mon Sep 17 00:00:00 2001 From: benjamindonnachie <83379521+benjamindonnachie@users.noreply.github.com> Date: Sat, 17 Aug 2024 14:03:26 +0100 Subject: [PATCH 1/3] BUG: Fix OverflowError in lib.maybe_indices_to_slice() This fixes this error when slicing massive dataframes: Traceback (most recent call last): File "", line 1, in File "/opt/anaconda3/lib/python3.12/site-packages/pandas/core/frame.py", line 4093, in __getitem__ return self._getitem_bool_array(key) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/anaconda3/lib/python3.12/site-packages/pandas/core/frame.py", line 4155, in _getitem_bool_array return self._take_with_is_copy(indexer, axis=0) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/anaconda3/lib/python3.12/site-packages/pandas/core/generic.py", line 4153, in _take_with_is_copy result = self.take(indices=indices, axis=axis) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/anaconda3/lib/python3.12/site-packages/pandas/core/generic.py", line 4133, in take new_data = self._mgr.take( ^^^^^^^^^^^^^^^ File "/opt/anaconda3/lib/python3.12/site-packages/pandas/core/internals/managers.py", line 893, in take new_labels = self.axes[axis].take(indexer) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/opt/anaconda3/lib/python3.12/site-packages/pandas/core/indexes/datetimelike.py", line 839, in take maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "lib.pyx", line 522, in pandas._libs.lib.maybe_indices_to_slice OverflowError: value too large to convert to int --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/lib.pyi | 2 +- pandas/_libs/lib.pyx | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9fab1d12fc6a5..e238244c3927a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -699,6 +699,7 @@ Indexing - Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) +- Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`) Missing ^^^^^^^ diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index daaaacee3487d..ebdf78d04379b 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -161,7 +161,7 @@ def maybe_booleans_to_slice( ) -> slice | npt.NDArray[np.uint8]: ... def maybe_indices_to_slice( indices: npt.NDArray[np.intp], - max_len: int, + max_len: np.intp, ) -> slice | npt.NDArray[np.intp]: ... def is_all_arraylike(obj: list) -> bool: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 3c509a3eae11a..63d70f4ce59c9 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -502,7 +502,7 @@ def has_only_ints_or_nan(const floating[:] arr) -> bool: return True -def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len): +def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, intp_t max_len): cdef: Py_ssize_t i, n = len(indices) intp_t k, vstart, vlast, v From 0569e3cc79032f4d3ac44d3c51b4accccc6cfd88 Mon Sep 17 00:00:00 2001 From: Scott Talbert Date: Fri, 7 Mar 2025 12:55:13 -0500 Subject: [PATCH 2/3] Sort whatsnew entries --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e238244c3927a..e2e4144f3323d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -694,12 +694,12 @@ Interval Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) +- Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) - Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`) - Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) -- Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`) Missing ^^^^^^^ From b120c2cd81bd621a962f2c7245b09a2e5ab7ee69 Mon Sep 17 00:00:00 2001 From: Scott Talbert Date: Fri, 7 Mar 2025 14:43:39 -0500 Subject: [PATCH 3/3] Set type hint back to int --- pandas/_libs/lib.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index ebdf78d04379b..daaaacee3487d 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -161,7 +161,7 @@ def maybe_booleans_to_slice( ) -> slice | npt.NDArray[np.uint8]: ... def maybe_indices_to_slice( indices: npt.NDArray[np.intp], - max_len: np.intp, + max_len: int, ) -> slice | npt.NDArray[np.intp]: ... def is_all_arraylike(obj: list) -> bool: ...