diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index bed64efc690ec..69d89e2f32203 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -42,6 +42,7 @@ from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_insert_loc from pandas.core.dtypes.cast import ( astype_nansafe, @@ -81,7 +82,10 @@ extract_array, sanitize_array, ) -from pandas.core.indexers import check_array_indexer +from pandas.core.indexers import ( + check_array_indexer, + unpack_tuple_and_ellipses, +) from pandas.core.missing import interpolate_2d from pandas.core.nanops import check_below_min_count import pandas.core.ops as ops @@ -878,16 +882,13 @@ def __getitem__( ) -> SparseArrayT | Any: if isinstance(key, tuple): - if len(key) > 1: - if key[0] is Ellipsis: - key = key[1:] - elif key[-1] is Ellipsis: - key = key[:-1] - if len(key) > 1: - raise IndexError("too many indices for array.") - if key[0] is Ellipsis: + key = unpack_tuple_and_ellipses(key) + # Non-overlapping identity check (left operand type: + # "Union[Union[Union[int, integer[Any]], Union[slice, List[int], + # ndarray[Any, Any]]], Tuple[Union[int, ellipsis], ...]]", + # right operand type: "ellipsis") + if key is Ellipsis: # type: ignore[comparison-overlap] raise ValueError("Cannot slice with Ellipsis") - key = key[0] if is_integer(key): return self._get_val_at(key) @@ -952,12 +953,7 @@ def __getitem__( return type(self)(data_slice, kind=self.kind) def _get_val_at(self, loc): - n = len(self) - if loc < 0: - loc += n - - if loc >= n or loc < 0: - raise IndexError("Out of bounds access") + loc = validate_insert_loc(loc, len(self)) sp_loc = self.sp_index.lookup(loc) if sp_loc == -1: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 4e3bd05d2cc8d..e6058ad9dbaf2 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -58,6 +58,7 @@ ) from pandas.core.indexers import ( check_array_indexer, + unpack_tuple_and_ellipses, validate_indices, ) from pandas.core.strings.object_array import ObjectStringArrayMixin @@ -313,14 +314,7 @@ def __getitem__( "boolean arrays are valid indices." ) elif isinstance(item, tuple): - # possibly unpack arr[..., n] to arr[n] - if len(item) == 1: - item = item[0] - elif len(item) == 2: - if item[0] is Ellipsis: - item = item[1] - elif item[1] is Ellipsis: - item = item[0] + item = unpack_tuple_and_ellipses(item) # We are not an array indexer, so maybe e.g. a slice or integer # indexer. We dispatch to pyarrow. diff --git a/pandas/core/indexers/__init__.py b/pandas/core/indexers/__init__.py index 1558b03162d22..86ec36144b134 100644 --- a/pandas/core/indexers/__init__.py +++ b/pandas/core/indexers/__init__.py @@ -11,6 +11,7 @@ length_of_indexer, maybe_convert_indices, unpack_1tuple, + unpack_tuple_and_ellipses, validate_indices, ) @@ -28,4 +29,5 @@ "unpack_1tuple", "check_key_length", "check_array_indexer", + "unpack_tuple_and_ellipses", ] diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py index cf9be5eb95eb4..bc51dbd54d010 100644 --- a/pandas/core/indexers/utils.py +++ b/pandas/core/indexers/utils.py @@ -435,6 +435,24 @@ def check_key_length(columns: Index, key, value: DataFrame) -> None: raise ValueError("Columns must be same length as key") +def unpack_tuple_and_ellipses(item: tuple): + """ + Possibly unpack arr[..., n] to arr[n] + """ + if len(item) > 1: + # Note: we are assuming this indexing is being done on a 1D arraylike + if item[0] is Ellipsis: + item = item[1:] + elif item[-1] is Ellipsis: + item = item[:-1] + + if len(item) > 1: + raise IndexError("too many indices for array.") + + item = item[0] + return item + + # ----------------------------------------------------------- # Public indexer validation diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 07ae7511bb333..96021bfa18fb7 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -258,7 +258,7 @@ def test_get_item(self): assert self.zarr[2] == 1 assert self.zarr[7] == 5 - errmsg = re.compile("bounds") + errmsg = "must be an integer between -10 and 10" with pytest.raises(IndexError, match=errmsg): self.arr[11] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 2eef828288e59..e3e5e092f143b 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -40,6 +40,7 @@ ExtensionDtype, ) from pandas.api.types import is_bool_dtype +from pandas.core.indexers import unpack_tuple_and_ellipses class JSONDtype(ExtensionDtype): @@ -86,14 +87,7 @@ def _from_factorized(cls, values, original): def __getitem__(self, item): if isinstance(item, tuple): - if len(item) > 1: - if item[0] is Ellipsis: - item = item[1:] - elif item[-1] is Ellipsis: - item = item[:-1] - if len(item) > 1: - raise IndexError("too many indices for array.") - item = item[0] + item = unpack_tuple_and_ellipses(item) if isinstance(item, numbers.Integral): return self.data[item]