Skip to content

Commit 1fabe15

Browse files
committed
REF: implement EA.pad_or_backfill
1 parent b4b35af commit 1fabe15

File tree

16 files changed

+284
-22
lines changed

16 files changed

+284
-22
lines changed

doc/source/whatsnew/v2.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ Deprecations
306306
- Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`)
307307
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
308308
- Deprecated strings ``T``, ``t``, ``L`` and ``l`` denoting units in :func:`to_timedelta` (:issue:`52536`)
309+
- Deprecated the "method" and "limit" keywords in :meth:`ExtensionArray.fillna`, implement and use :meth:`ExtensionArray.pad_or_backfill` instead (:issue:`53621`)
309310
- Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`)
310311
- Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`)
311312
- Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`)

pandas/core/arrays/_mixins.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
AxisInt,
2020
Dtype,
2121
F,
22+
FillnaOptions,
2223
PositionalIndexer2D,
2324
PositionalIndexerTuple,
2425
ScalarIndexer,
@@ -294,6 +295,37 @@ def _fill_mask_inplace(
294295
func = missing.get_fill_func(method, ndim=self.ndim)
295296
func(self._ndarray.T, limit=limit, mask=mask.T)
296297

298+
def pad_or_backfill(
299+
self,
300+
*,
301+
method: FillnaOptions,
302+
limit: int | None = None,
303+
limit_area: Literal["inside", "outside"] | None = None,
304+
copy: bool = True,
305+
) -> Self:
306+
mask = self.isna()
307+
if mask.any():
308+
# (for now) when self.ndim == 2, we assume axis=0
309+
func = missing.get_fill_func(method, ndim=self.ndim)
310+
311+
npvalues = self._ndarray.T
312+
if copy:
313+
npvalues = npvalues.copy()
314+
func(npvalues, limit=limit, mask=mask.T)
315+
npvalues = npvalues.T
316+
317+
if copy:
318+
new_values = self._from_backing_data(npvalues)
319+
else:
320+
new_values = self
321+
322+
else:
323+
if copy:
324+
new_values = self.copy()
325+
else:
326+
new_values = self
327+
return new_values
328+
297329
@doc(ExtensionArray.fillna)
298330
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
299331
value, method = validate_fillna_kwargs(
@@ -309,7 +341,6 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
309341

310342
if mask.any():
311343
if method is not None:
312-
# TODO: check value is None
313344
# (for now) when self.ndim == 2, we assume axis=0
314345
func = missing.get_fill_func(method, ndim=self.ndim)
315346
npvalues = self._ndarray.T.copy()

pandas/core/arrays/arrow/array.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,24 @@ def dropna(self) -> Self:
899899
"""
900900
return type(self)(pc.drop_null(self._pa_array))
901901

902+
def pad_or_backfill(
903+
self,
904+
*,
905+
method: FillnaOptions,
906+
limit: int | None = None,
907+
limit_area: Literal["inside", "outside"] | None = None,
908+
copy: bool = True,
909+
) -> Self:
910+
if not self._hasna:
911+
# TODO(CoW): Not necessary anymore when CoW is the default
912+
return self.copy()
913+
914+
# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
915+
# this method entirely.
916+
return super().pad_or_backfill(
917+
method=method, limit=limit, limit_area=limit_area, copy=copy
918+
)
919+
902920
@doc(ExtensionArray.fillna)
903921
def fillna(
904922
self,

pandas/core/arrays/base.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
cast,
2121
overload,
2222
)
23+
import warnings
2324

2425
import numpy as np
2526

@@ -35,6 +36,7 @@
3536
Substitution,
3637
cache_readonly,
3738
)
39+
from pandas.util._exceptions import find_stack_level
3840
from pandas.util._validators import (
3941
validate_bool_kwarg,
4042
validate_fillna_kwargs,
@@ -127,6 +129,7 @@ class ExtensionArray:
127129
interpolate
128130
isin
129131
isna
132+
pad_or_backfill
130133
ravel
131134
repeat
132135
searchsorted
@@ -177,6 +180,7 @@ class ExtensionArray:
177180
methods:
178181
179182
* fillna
183+
* pad_or_backfill
180184
* dropna
181185
* unique
182186
* factorize / _values_for_factorize
@@ -782,6 +786,82 @@ def interpolate(
782786
f"{type(self).__name__} does not implement interpolate"
783787
)
784788

789+
def pad_or_backfill(
790+
self,
791+
*,
792+
method: FillnaOptions,
793+
limit: int | None = None,
794+
limit_area: Literal["inside", "outside"] | None = None,
795+
copy: bool = True,
796+
) -> Self:
797+
"""
798+
pad or backfill values, used by Series/DataFrame ffill and bfill.
799+
800+
Parameters
801+
----------
802+
method : {'backfill', 'bfill', 'pad', 'ffill'}
803+
Method to use for filling holes in reindexed Series:
804+
805+
* pad / ffill: propagate last valid observation forward to next valid.
806+
* backfill / bfill: use NEXT valid observation to fill gap.
807+
808+
limit : int, default None
809+
This is the maximum number of consecutive
810+
NaN values to forward/backward fill. In other words, if there is
811+
a gap with more than this number of consecutive NaNs, it will only
812+
be partially filled. If method is not specified, this is the
813+
maximum number of entries along the entire axis where NaNs will be
814+
filled.
815+
816+
copy : bool, default True
817+
Whether to make a copy of the data before filling. If False, then
818+
the original should be modified and no new memory should be allocated.
819+
For ExtensionArray subclasses that cannot do this, it is at the
820+
author's discretion whether to ignore "copy=False" or to raise.
821+
The base class implementation ignores the keyword if any NAs are
822+
present.
823+
824+
"""
825+
826+
# If a 3rd-party EA has implemented this functionality in fillna,
827+
# we warn that they need to implement pad_or_backfill instead.
828+
if (
829+
type(self).fillna is not ExtensionArray.fillna
830+
and type(self).pad_or_backfill is ExtensionArray.pad_or_backfill
831+
):
832+
# Check for pad_or_backfill here allows us to call
833+
# super().pad_or_backfill without getting this warning
834+
warnings.warn(
835+
"ExtensionArray.fillna 'method' keyword is deprecated. "
836+
"In a future version. arr.pad_or_backfill will be called "
837+
"instead. 3rd-party ExtensionArray authors need to implement "
838+
"pad_or_backfill.",
839+
FutureWarning,
840+
stacklevel=find_stack_level(),
841+
)
842+
return self.fillna(method=method, limit=limit)
843+
844+
mask = self.isna()
845+
846+
if mask.any():
847+
# NB: the base class does not respect the "copy" keyword
848+
meth = missing.clean_fill_method(method)
849+
850+
npmask = np.asarray(mask)
851+
if meth == "pad":
852+
indexer = libalgos.get_fill_indexer(npmask, limit=limit)
853+
return self.take(indexer, allow_fill=True)
854+
else:
855+
# i.e. meth == "backfill"
856+
indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1]
857+
return self[::-1].take(indexer, allow_fill=True)
858+
859+
else:
860+
if not copy:
861+
return self
862+
new_values = self.copy()
863+
return new_values
864+
785865
def fillna(
786866
self,
787867
value: object | ArrayLike | None = None,
@@ -803,6 +883,8 @@ def fillna(
803883
* pad / ffill: propagate last valid observation forward to next valid.
804884
* backfill / bfill: use NEXT valid observation to fill gap.
805885
886+
.. deprecated:: 2.1.0
887+
806888
limit : int, default None
807889
If method is specified, this is the maximum number of consecutive
808890
NaN values to forward/backward fill. In other words, if there is
@@ -811,11 +893,22 @@ def fillna(
811893
maximum number of entries along the entire axis where NaNs will be
812894
filled.
813895
896+
.. deprecated:: 2.1.0
897+
814898
Returns
815899
-------
816900
ExtensionArray
817901
With NA/NaN filled.
818902
"""
903+
if method is not None:
904+
warnings.warn(
905+
f"The 'method' keyword in {type(self).__name__}.fillna is "
906+
"deprecated and will be removed in a future version. "
907+
"Use pad_or_backfill instead.",
908+
FutureWarning,
909+
stacklevel=find_stack_level(),
910+
)
911+
819912
value, method = validate_fillna_kwargs(value, method)
820913

821914
mask = self.isna()

pandas/core/arrays/interval.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
ArrayLike,
3232
AxisInt,
3333
Dtype,
34+
FillnaOptions,
3435
IntervalClosedType,
3536
NpDtype,
3637
PositionalIndexer,
@@ -886,6 +887,16 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr
886887
indexer = obj.argsort()[-1]
887888
return obj[indexer]
888889

890+
def pad_or_backfill(
891+
self,
892+
*,
893+
method: FillnaOptions,
894+
limit: int | None = None,
895+
limit_area: Literal["inside", "outside"] | None = None,
896+
copy: bool = True,
897+
) -> Self:
898+
raise TypeError("Filling by method is not supported for IntervalArray.")
899+
889900
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
890901
"""
891902
Fill NA/NaN values using the specified method.

pandas/core/arrays/masked.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
AstypeArg,
2626
AxisInt,
2727
DtypeObj,
28+
FillnaOptions,
2829
NpDtype,
2930
PositionalIndexer,
3031
Scalar,
@@ -178,6 +179,36 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
178179

179180
return self._simple_new(self._data[item], newmask)
180181

182+
def pad_or_backfill(
183+
self,
184+
*,
185+
method: FillnaOptions,
186+
limit: int | None = None,
187+
limit_area: Literal["inside", "outside"] | None = None,
188+
copy: bool = True,
189+
) -> Self:
190+
mask = self._mask
191+
192+
if mask.any():
193+
func = missing.get_fill_func(method, ndim=self.ndim)
194+
195+
npvalues = self._data.T
196+
new_mask = mask.T
197+
if copy:
198+
npvalues = npvalues.copy()
199+
new_mask = new_mask.copy()
200+
func(npvalues, limit=limit, mask=new_mask)
201+
if copy:
202+
return self._simple_new(npvalues.T, new_mask.T)
203+
else:
204+
return self
205+
else:
206+
if copy:
207+
new_values = self.copy()
208+
else:
209+
new_values = self
210+
return new_values
211+
181212
@doc(ExtensionArray.fillna)
182213
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
183214
value, method = validate_fillna_kwargs(value, method)

pandas/core/arrays/numpy_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def pad_or_backfill(
233233
self,
234234
*,
235235
method: FillnaOptions,
236-
limit: int | None,
236+
limit: int | None = None,
237237
limit_area: Literal["inside", "outside"] | None = None,
238238
copy: bool = True,
239239
) -> Self:

pandas/core/arrays/period.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
from pandas._typing import (
7878
AnyArrayLike,
7979
Dtype,
80+
FillnaOptions,
8081
NpDtype,
8182
NumpySorter,
8283
NumpyValueArrayLike,
@@ -789,6 +790,25 @@ def searchsorted(
789790
m8arr = self._ndarray.view("M8[ns]")
790791
return m8arr.searchsorted(npvalue, side=side, sorter=sorter)
791792

793+
def pad_or_backfill(
794+
self,
795+
*,
796+
method: FillnaOptions,
797+
limit: int | None = None,
798+
limit_area: Literal["inside", "outside"] | None = None,
799+
copy: bool = True,
800+
) -> Self:
801+
# view as dt64 so we get treated as timelike in core.missing,
802+
# similar to dtl._period_dispatch
803+
dta = self.view("M8[ns]")
804+
result = dta.pad_or_backfill(
805+
method=method, limit=limit, limit_area=limit_area, copy=copy
806+
)
807+
if copy:
808+
return cast("Self", result.view(self.dtype))
809+
else:
810+
return self
811+
792812
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
793813
if method is not None:
794814
# view as dt64 so we get treated as timelike in core.missing,

pandas/core/arrays/sparse/array.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,30 @@ def isna(self):
713713
mask[self.sp_index.indices] = isna(self.sp_values)
714714
return type(self)(mask, fill_value=False, dtype=dtype)
715715

716+
def pad_or_backfill(
717+
self,
718+
*,
719+
method: FillnaOptions,
720+
limit: int | None = None,
721+
limit_area: Literal["inside", "outside"] | None = None,
722+
copy: bool = True,
723+
) -> Self:
724+
msg = "pad_or_backfill with 'method' requires high memory usage."
725+
warnings.warn(
726+
msg,
727+
PerformanceWarning,
728+
stacklevel=find_stack_level(),
729+
)
730+
new_values = np.asarray(self)
731+
# pad_or_backfill_inplace modifies new_values inplace
732+
# error: Argument "method" to "pad_or_backfill_inplace" has incompatible
733+
# type "Literal['backfill', 'bfill', 'ffill', 'pad']"; expected
734+
# "Literal['pad', 'backfill']"
735+
pad_or_backfill_inplace(
736+
new_values, method=method, limit=limit # type: ignore[arg-type]
737+
)
738+
return type(self)(new_values, fill_value=self.fill_value)
739+
716740
def fillna(
717741
self,
718742
value=None,

pandas/core/internals/blocks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1903,9 +1903,9 @@ def pad_or_backfill(
19031903
values = self.values
19041904
if values.ndim == 2 and axis == 1:
19051905
# NDArrayBackedExtensionArray.fillna assumes axis=0
1906-
new_values = values.T.fillna(method=method, limit=limit).T
1906+
new_values = values.T.pad_or_backfill(method=method, limit=limit).T
19071907
else:
1908-
new_values = values.fillna(method=method, limit=limit)
1908+
new_values = values.pad_or_backfill(method=method, limit=limit)
19091909
return [self.make_block_same_class(new_values)]
19101910

19111911

pandas/tests/arrays/test_datetimelike.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ def test_fillna_method_doesnt_change_orig(self, method):
251251

252252
fill_value = arr[3] if method == "pad" else arr[5]
253253

254-
result = arr.fillna(method=method)
254+
result = arr.pad_or_backfill(method=method)
255255
assert result[4] == fill_value
256256

257257
# check that the original was not changed

0 commit comments

Comments
 (0)