diff --git a/pandas/_libs/arrays.pxd b/pandas/_libs/arrays.pxd new file mode 100644 index 0000000000000..737da29da46a4 --- /dev/null +++ b/pandas/_libs/arrays.pxd @@ -0,0 +1,11 @@ + +from numpy cimport ndarray + + +cdef class NDArrayBacked: + cdef: + readonly ndarray _ndarray + readonly object _dtype + + cpdef NDArrayBacked _from_backing_data(self, ndarray values) + cpdef __setstate__(self, state) diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx index 1f4a47c4e252a..a2d4cf3000ee1 100644 --- a/pandas/_libs/arrays.pyx +++ b/pandas/_libs/arrays.pyx @@ -45,9 +45,9 @@ cdef class NDArrayBacked: # TODO: implement take in terms of cnp.PyArray_TakeFrom # TODO: implement concat_same_type in terms of cnp.PyArray_Concatenate - cdef: - readonly ndarray _ndarray - readonly object _dtype + # cdef: + # readonly ndarray _ndarray + # readonly object _dtype def __init__(self, ndarray values, object dtype): self._ndarray = values diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi index f3436e9c7afba..74ca311b35ed7 100644 --- a/pandas/_libs/internals.pyi +++ b/pandas/_libs/internals.pyi @@ -12,6 +12,7 @@ from pandas._typing import ( ) from pandas import Index +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.internals.blocks import Block as B def slice_len(slc: slice, objlen: int = ...) -> int: ... @@ -67,6 +68,10 @@ class NumpyBlock(SharedBlock): values: np.ndarray def getitem_block_index(self: T, slicer: slice) -> T: ... +class NDArrayBackedBlock(SharedBlock): + values: NDArrayBackedExtensionArray + def getitem_block_index(self: T, slicer: slice) -> T: ... + class Block(SharedBlock): ... diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 77bb462c6df4a..6c1ca3deba047 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -23,6 +23,7 @@ cnp.import_array() from pandas._libs.algos import ensure_int64 +from pandas._libs.arrays cimport NDArrayBacked from pandas._libs.util cimport is_integer_object @@ -527,6 +528,29 @@ cdef class NumpyBlock(SharedBlock): return type(self)(new_values, self._mgr_locs, ndim=self.ndim) +cdef class NDArrayBackedBlock(SharedBlock): + """ + Block backed by NDArrayBackedExtensionArray + """ + cdef public: + NDArrayBacked values + + def __cinit__(self, NDArrayBacked values, BlockPlacement placement, int ndim): + # set values here the (implicit) call to SharedBlock.__cinit__ will + # set placement and ndim + self.values = values + + # @final # not useful in cython, but we _would_ annotate with @final + cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer): + """ + Perform __getitem__-like specialized to slicing along index. + + Assumes self.ndim == 2 + """ + new_values = self.values[..., slicer] + return type(self)(new_values, self._mgr_locs, ndim=self.ndim) + + cdef class Block(SharedBlock): cdef: public object values diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 693b1832ed3c9..ad25eb6fbcaa8 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -51,6 +51,7 @@ from pandas.core.construction import ( array as pd_array, create_series_with_explicit_dtype, + ensure_wrapped_if_datetimelike, ) if TYPE_CHECKING: @@ -908,6 +909,7 @@ def apply_broadcast(self, target: DataFrame) -> DataFrame: @property def series_generator(self): values = self.values + values = ensure_wrapped_if_datetimelike(values) assert len(values) > 0 # We create one Series object, and will swap out the data inside diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 60d79718cd85f..52b5dd9b7b5b4 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -49,7 +49,6 @@ is_categorical_dtype, is_complex_dtype, is_datetime64_any_dtype, - is_extension_array_dtype, is_integer_dtype, is_numeric_dtype, is_sparse, @@ -978,7 +977,7 @@ def agg_series(self, obj: Series, func: F) -> tuple[ArrayLike, np.ndarray]: # SeriesGrouper would raise if we were to call _aggregate_series_fast result, counts = self._aggregate_series_pure_python(obj, func) - elif is_extension_array_dtype(obj.dtype): + elif not isinstance(obj._values, np.ndarray): # _aggregate_series_fast would raise TypeError when # calling libreduction.Slider # In the datetime64tz case it would incorrectly cast to tz-naive diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d87e77043a713..92f9d803d1ebe 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -263,7 +263,7 @@ def make_block_same_class( if placement is None: placement = self._mgr_locs - if values.dtype.kind == "m": + if values.dtype.kind in ["m", "M"]: # TODO: remove this once fastparquet has stopped relying on it values = ensure_wrapped_if_datetimelike(values) @@ -1663,12 +1663,13 @@ class NumericBlock(NumpyBlock): is_numeric = True -class NDArrayBackedExtensionBlock(libinternals.Block, EABackedBlock): +class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock): """ Block backed by an NDArrayBackedExtensionArray """ values: NDArrayBackedExtensionArray + getitem_block_index = libinternals.NDArrayBackedBlock.getitem_block_index @property def is_view(self) -> bool: