diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index df35a2c08a25e..8723b1b766485 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -30,7 +30,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6275fe39558a3..78ebfad4e3ae9 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1557,6 +1557,16 @@ def _slice(self, slicer): return self.values[slicer] + @final + def getitem_block_index(self, slicer: slice) -> ExtensionBlock: + """ + Perform __getitem__-like specialized to slicing along index. + """ + # GH#42787 in principle this is equivalent to values[..., slicer], but we don't + # require subclasses of ExtensionArray to support that form (for now). + new_values = self.values[slicer] + return type(self)(new_values, self._mgr_locs, ndim=self.ndim) + def fillna( self, value, limit=None, inplace: bool = False, downcast=None ) -> list[Block]: diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 96833a2e49fa1..ac181af7875b5 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -425,3 +425,23 @@ def test_item(self, data): with pytest.raises(ValueError, match=msg): s.item() + + def test_ellipsis_index(self): + # GH42430 1D slices over extension types turn into N-dimensional slices over + # ExtensionArrays + class CapturingStringArray(pd.arrays.StringArray): + """Extend StringArray to capture arguments to __getitem__""" + + def __getitem__(self, item): + self.last_item_arg = item + return super().__getitem__(item) + + df = pd.DataFrame( + {"col1": CapturingStringArray(np.array(["hello", "world"], dtype=object))} + ) + _ = df.iloc[:1] + + # String comparison because there's no native way to compare slices. + # Before the fix for GH42430, last_item_arg would get set to the 2D slice + # (Ellipsis, slice(None, 1, None)) + self.assert_equal(str(df["col1"].array.last_item_arg), "slice(None, 1, None)")