diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 9453edb0c88d8..c74b2b44cf346 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -241,6 +241,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.api.extensions.ExtensionArray.factorize \ pandas.api.extensions.ExtensionArray.fillna \ pandas.api.extensions.ExtensionArray.insert \ + pandas.api.extensions.ExtensionArray.interpolate \ pandas.api.extensions.ExtensionArray.isin \ pandas.api.extensions.ExtensionArray.isna \ pandas.api.extensions.ExtensionArray.ravel \ diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index b33efd388bd60..63eacc3f6d1d9 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -51,6 +51,7 @@ objects. api.extensions.ExtensionArray.factorize api.extensions.ExtensionArray.fillna api.extensions.ExtensionArray.insert + api.extensions.ExtensionArray.interpolate api.extensions.ExtensionArray.isin api.extensions.ExtensionArray.isna api.extensions.ExtensionArray.ravel diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 517baa648d805..50e8bb0087d73 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -111,6 +111,7 @@ Other enhancements - :meth:`Series.str.join` now supports ``ArrowDtype(pa.string())`` (:issue:`53646`) - :meth:`SeriesGroupby.agg` and :meth:`DataFrameGroupby.agg` now support passing in multiple functions for ``engine="numba"`` (:issue:`53486`) - :meth:`SeriesGroupby.transform` and :meth:`DataFrameGroupby.transform` now support passing in a string as the function for ``engine="numba"`` (:issue:`53579`) +- Added :meth:`ExtensionArray.interpolate` used by :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`53659`) - Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`) - Added a new parameter ``by_row`` to :meth:`Series.apply`. When set to ``False`` the supplied callables will always operate on the whole Series (:issue:`53400`). - Groupby aggregations (such as :meth:`DataFrameGroupby.sum`) now can preserve the dtype of the input instead of casting to ``float64`` (:issue:`44952`) @@ -118,6 +119,7 @@ Other enhancements - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`) - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: diff --git a/pandas/_typing.py b/pandas/_typing.py index 9d4acbe76ba15..ffe9e6b319dfd 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -307,6 +307,26 @@ def closed(self) -> bool: # Arguments for fillna() FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"] +InterpolateOptions = Literal[ + "linear", + "time", + "index", + "values", + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "barycentric", + "polynomial", + "krogh", + "piecewise_polynomial", + "spline", + "pchip", + "akima", + "cubicspline", + "from_derivatives", +] # internals Manager = Union[ diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 78ecd93d5cc75..ceac8e22426d9 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -78,6 +78,7 @@ AxisInt, Dtype, FillnaOptions, + InterpolateOptions, NumpySorter, NumpyValueArrayLike, PositionalIndexer, @@ -90,6 +91,8 @@ npt, ) + from pandas import Index + _extension_array_shared_docs: dict[str, str] = {} @@ -118,6 +121,7 @@ class ExtensionArray: fillna equals insert + interpolate isin isna ravel @@ -155,6 +159,7 @@ class ExtensionArray: * take * copy * _concat_same_type + * interpolate A default repr displaying the type, (truncated) data, length, and dtype is provided. It can be customized or replaced by @@ -753,6 +758,27 @@ def argmax(self, skipna: bool = True) -> int: raise NotImplementedError return nargminmax(self, "argmax") + def interpolate( + self, + *, + method: InterpolateOptions, + axis: int, + index: Index, + limit, + limit_direction, + limit_area, + fill_value, + copy: bool, + **kwargs, + ) -> Self: + """ + See DataFrame.interpolate.__doc__. + """ + # NB: we return type(self) even if copy=False + raise NotImplementedError( + f"{type(self).__name__} does not implement interpolate" + ) + def fillna( self, value: object | ArrayLike | None = None, diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f13a5e8b560a4..bef8b55b876a2 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -58,6 +58,7 @@ Dtype, DtypeObj, F, + InterpolateOptions, NpDtype, PositionalIndexer2D, PositionalIndexerTuple, @@ -2233,23 +2234,23 @@ def copy(self, order: str = "C") -> Self: def interpolate( self, *, - method, + method: InterpolateOptions, axis: int, index: Index, limit, limit_direction, limit_area, - inplace: bool, + copy: bool, **kwargs, ) -> Self: """ See NDFrame.interpolate.__doc__. """ - # NB: we return type(self) even if inplace=True + # NB: we return type(self) even if copy=False if method != "linear": raise NotImplementedError - if inplace: + if not copy: out_data = self._ndarray else: out_data = self._ndarray.copy() @@ -2264,7 +2265,7 @@ def interpolate( limit_area=limit_area, **kwargs, ) - if inplace: + if not copy: return self return type(self)._simple_new(out_data, dtype=self.dtype) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 8b6e49ccf2d41..990b55cb40366 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -36,6 +36,7 @@ AxisInt, Dtype, FillnaOptions, + InterpolateOptions, NpDtype, Scalar, Self, @@ -261,20 +262,20 @@ def pad_or_backfill( def interpolate( self, *, - method, + method: InterpolateOptions, axis: int, index: Index, limit, limit_direction, limit_area, - inplace: bool, + copy: bool, **kwargs, ) -> Self: """ See NDFrame.interpolate.__doc__. """ - # NB: we return type(self) even if inplace=True - if inplace: + # NB: we return type(self) even if copy=False + if not copy: out_data = self._ndarray else: out_data = self._ndarray.copy() @@ -290,7 +291,7 @@ def interpolate( limit_area=limit_area, **kwargs, ) - if inplace: + if not copy: return self return type(self)._simple_new(out_data, dtype=self.dtype) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a4b4466e8d609..b9cf5fca30b2f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -62,6 +62,7 @@ IgnoreRaise, IndexKeyFunc, IndexLabel, + InterpolateOptions, IntervalClosedType, JSONSerializable, Level, @@ -7753,27 +7754,7 @@ def replace( @final def interpolate( self, - method: Literal[ - "linear", - "time", - "index", - "values", - "pad", - "nearest", - "zero", - "slinear", - "quadratic", - "cubic", - "barycentric", - "polynomial", - "krogh", - "piecewise_polynomial", - "spline", - "pchip", - "akima", - "cubicspline", - "from_derivatives", - ] = "linear", + method: InterpolateOptions = "linear", *, axis: Axis = 0, limit: int | None = None, diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 832e1560ae55e..eec94a2d61c26 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -35,6 +35,7 @@ F, FillnaOptions, IgnoreRaise, + InterpolateOptions, QuantileInterpolation, Self, Shape, @@ -600,13 +601,13 @@ def _get_values_and_refs(self, using_cow, inplace): @final def _get_refs_and_copy(self, using_cow: bool, inplace: bool): refs = None - arr_inplace = inplace + copy = not inplace if inplace: if using_cow and self.refs.has_reference(): - arr_inplace = False + copy = True else: refs = self.refs - return arr_inplace, refs + return copy, refs # --------------------------------------------------------------------- # Replace @@ -1383,7 +1384,7 @@ def pad_or_backfill( def interpolate( self, *, - method: FillnaOptions = "pad", + method: FillnaOptions | InterpolateOptions = "pad", axis: AxisInt = 0, index: Index | None = None, inplace: bool = False, @@ -1402,17 +1403,8 @@ def interpolate( return [self.copy(deep=False)] return [self] if inplace else [self.copy()] - try: - m = missing.clean_fill_method(method) - except ValueError: - m = None - # error: Non-overlapping equality check (left operand type: - # "Literal['backfill', 'bfill', 'ffill', 'pad']", right - # operand type: "Literal['asfreq']") - if method == "asfreq": # type: ignore[comparison-overlap] - # clean_fill_method used to allow this - raise - if m is None and self.dtype == _dtype_obj: + # TODO(3.0): this case will not be reachable once GH#53638 is enforced + if not _interp_method_is_pad_or_backfill(method) and self.dtype == _dtype_obj: # only deal with floats # bc we already checked that can_hold_na, we don't have int dtype here # test_interp_basic checks that we make a copy here @@ -1435,32 +1427,32 @@ def interpolate( **kwargs, ) - arr_inplace, refs = self._get_refs_and_copy(using_cow, inplace) + copy, refs = self._get_refs_and_copy(using_cow, inplace) # Dispatch to the PandasArray method. # We know self.array_values is a PandasArray bc EABlock overrides - if m is not None: + if _interp_method_is_pad_or_backfill(method): # TODO: warn about ignored kwargs, limit_direction, index...? new_values = cast(PandasArray, self.array_values).pad_or_backfill( - method=method, + method=cast(FillnaOptions, method), axis=axis, limit=limit, limit_area=limit_area, - copy=not arr_inplace, + copy=copy, ) else: assert index is not None # for mypy new_values = cast(PandasArray, self.array_values).interpolate( - method=method, + method=cast(InterpolateOptions, method), axis=axis, index=index, limit=limit, limit_direction=limit_direction, limit_area=limit_area, - inplace=arr_inplace, + copy=copy, **kwargs, ) - data = new_values._ndarray + data = extract_array(new_values, extract_numpy=True) nb = self.make_block_same_class(data, refs=refs) return nb._maybe_downcast([nb], downcast, using_cow) @@ -1889,7 +1881,8 @@ def values_for_json(self) -> np.ndarray: def interpolate( self, *, - method: FillnaOptions = "pad", + method: FillnaOptions | InterpolateOptions = "pad", + index: Index | None = None, axis: int = 0, inplace: bool = False, limit: int | None = None, @@ -1898,11 +1891,28 @@ def interpolate( **kwargs, ): values = self.values - if values.ndim == 2 and axis == 0: - # NDArrayBackedExtensionArray.fillna assumes axis=1 - new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T + + if not _interp_method_is_pad_or_backfill(method): + imeth = cast(InterpolateOptions, method) + return super().interpolate( + method=imeth, + index=index, + axis=axis, + inplace=inplace, + limit=limit, + fill_value=fill_value, + using_cow=using_cow, + **kwargs, + ) else: - new_values = values.fillna(value=fill_value, method=method, limit=limit) + meth = cast(FillnaOptions, method) + if values.ndim == 2 and axis == 0: + # NDArrayBackedExtensionArray.fillna assumes axis=1 + new_values = values.T.fillna( + value=fill_value, method=meth, limit=limit + ).T + else: + new_values = values.fillna(value=fill_value, method=meth, limit=limit) return self.make_block_same_class(new_values) @@ -2245,45 +2255,6 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock): def values_for_json(self) -> np.ndarray: return self.values._ndarray - def interpolate( - self, - *, - method: FillnaOptions = "pad", - index: Index | None = None, - axis: int = 0, - inplace: bool = False, - limit: int | None = None, - fill_value=None, - using_cow: bool = False, - **kwargs, - ): - values = self.values - - # error: Non-overlapping equality check (left operand type: - # "Literal['backfill', 'bfill', 'ffill', 'pad']", right operand type: - # "Literal['linear']") [comparison-overlap] - if method == "linear": # type: ignore[comparison-overlap] - # TODO: GH#50950 implement for arbitrary EAs - arr_inplace, refs = self._get_refs_and_copy(using_cow, inplace) - - new_values = self.values.interpolate( - method=method, - index=index, - axis=axis, - inplace=arr_inplace, - limit=limit, - fill_value=fill_value, - **kwargs, - ) - return self.make_block_same_class(new_values, refs=refs) - - elif values.ndim == 2 and axis == 0: - # NDArrayBackedExtensionArray.fillna assumes axis=1 - new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T - else: - new_values = values.fillna(value=fill_value, method=method, limit=limit) - return self.make_block_same_class(new_values) - class DatetimeTZBlock(DatetimeLikeBlock): """implement a datetime64 block with a tz attribute""" @@ -2597,3 +2568,14 @@ def external_values(values: ArrayLike) -> ArrayLike: # TODO(CoW) we should also mark our ExtensionArrays as read-only return values + + +def _interp_method_is_pad_or_backfill(method: str) -> bool: + try: + m = missing.clean_fill_method(method) + except ValueError: + m = None + if method == "asfreq": + # clean_fill_method used to allow this + raise + return m is not None diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 9566a2f113b36..ef6a91b811c4a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -89,7 +89,7 @@ AxisInt, Frequency, IndexLabel, - QuantileInterpolation, + InterpolateOptions, T, TimedeltaConvertibleTypes, TimeGrouperOrigin, @@ -837,7 +837,7 @@ def fillna(self, method, limit: int | None = None): def interpolate( self, - method: QuantileInterpolation = "linear", + method: InterpolateOptions = "linear", *, axis: Axis = 0, limit: int | None = None,