From d581753dfe056f71e796c1cb7db72c49c9b023c8 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 24 Sep 2020 20:19:09 -0700 Subject: [PATCH 1/3] CLN: simplify interpolate_2d and callers --- pandas/core/arrays/categorical.py | 2 +- pandas/core/internals/blocks.py | 11 ++++------- pandas/core/missing.py | 13 ++----------- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d2f88b353e1c1..4e83284cb96ed 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1643,7 +1643,7 @@ def fillna(self, value=None, method=None, limit=None): # TODO: dispatch when self.categories is EA-dtype values = np.asarray(self).reshape(-1, len(self)) - values = interpolate_2d(values, method, 0, None, value).astype( + values = interpolate_2d(values, method, 0, None).astype( self.categories.dtype )[0] codes = _get_codes_for_values(values, self.categories) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f18bc4d0bcf85..278d71068b7bf 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1181,12 +1181,15 @@ def interpolate( m = None if m is not None: + if fill_value is not None: + # similar to validate_fillna_kwargs + raise ValueError("Cannot pass both fill_value and method") + return self._interpolate_with_fill( method=m, axis=axis, inplace=inplace, limit=limit, - fill_value=fill_value, coerce=coerce, downcast=downcast, ) @@ -1214,7 +1217,6 @@ def _interpolate_with_fill( axis: int = 0, inplace: bool = False, limit: Optional[int] = None, - fill_value: Optional[Any] = None, coerce: bool = False, downcast: Optional[str] = None, ) -> List["Block"]: @@ -1232,16 +1234,11 @@ def _interpolate_with_fill( values = self.values if inplace else self.values.copy() - # We only get here for non-ExtensionBlock - fill_value = convert_scalar_for_putitemlike(fill_value, self.values.dtype) - values = missing.interpolate_2d( values, method=method, axis=axis, limit=limit, - fill_value=fill_value, - dtype=self.dtype, ) blocks = [self.make_block_same_class(values, ndim=self.ndim)] diff --git a/pandas/core/missing.py b/pandas/core/missing.py index edcdf2f54bc4c..f4182027e9e04 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -545,8 +545,6 @@ def interpolate_2d( method="pad", axis=0, limit=None, - fill_value=None, - dtype: Optional[DtypeObj] = None, ): """ Perform an actual interpolation of values, values will be make 2-d if @@ -563,18 +561,11 @@ def interpolate_2d( raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") values = values.reshape(tuple((1,) + values.shape)) - if fill_value is None: - mask = None - else: # todo create faster fill func without masking - mask = mask_missing(transf(values), fill_value) - method = clean_fill_method(method) if method == "pad": - values = transf(pad_2d(transf(values), limit=limit, mask=mask, dtype=dtype)) + values = transf(pad_2d(transf(values), limit=limit)) else: - values = transf( - backfill_2d(transf(values), limit=limit, mask=mask, dtype=dtype) - ) + values = transf(backfill_2d(transf(values), limit=limit)) # reshape back if ndim == 1: From 437be126ec0a73fb084335259e98d8cffae425f3 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 25 Sep 2020 11:22:18 -0700 Subject: [PATCH 2/3] add test --- pandas/tests/series/methods/test_interpolate.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index cba9443005f2f..9fc468221ee2d 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -340,6 +340,14 @@ def test_interp_invalid_method(self, invalid_method): with pytest.raises(ValueError, match=msg): s.interpolate(method=invalid_method, limit=-1) + def test_interp_invalid_method_and_value(self): + # GH#36624 + ser = Series([1, 3, np.nan, 12, np.nan, 25]) + + msg = "Cannot pass both fill_value and method" + with pytest.raises(ValueError, match=msg): + ser.interpolate(fill_value=3, method="pad") + def test_interp_limit_forward(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) From 5a9193a0992b72124239e5f393761ef640a5e72d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 25 Sep 2020 15:08:48 -0700 Subject: [PATCH 3/3] CLN: privatize in core.missing --- pandas/core/arrays/base.py | 4 ++-- pandas/core/missing.py | 37 +++++++++++++++++++------------------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index eae401f9744f0..c2fc72ff753a8 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -32,7 +32,7 @@ from pandas.core import ops from pandas.core.algorithms import factorize_array, unique -from pandas.core.missing import backfill_1d, pad_1d +from pandas.core.missing import get_fill_func from pandas.core.sorting import nargminmax, nargsort _extension_array_shared_docs: Dict[str, str] = dict() @@ -616,7 +616,7 @@ def fillna(self, value=None, method=None, limit=None): if mask.any(): if method is not None: - func = pad_1d if method == "pad" else backfill_1d + func = get_fill_func(method) new_values = func(self.astype(object), limit=limit, mask=mask) new_values = self._from_sequence(new_values, dtype=self.dtype) else: diff --git a/pandas/core/missing.py b/pandas/core/missing.py index f4182027e9e04..fe9876a8252b3 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -562,20 +562,22 @@ def interpolate_2d( values = values.reshape(tuple((1,) + values.shape)) method = clean_fill_method(method) + tvalues = transf(values) if method == "pad": - values = transf(pad_2d(transf(values), limit=limit)) + result = _pad_2d(tvalues, limit=limit) else: - values = transf(backfill_2d(transf(values), limit=limit)) + result = _backfill_2d(tvalues, limit=limit) + result = transf(result) # reshape back if ndim == 1: - values = values[0] + result = result[0] if orig_values.dtype.kind == "M": # convert float back to datetime64 - values = values.astype(orig_values.dtype) + result = result.astype(orig_values.dtype) - return values + return result def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool): @@ -597,10 +599,9 @@ def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool): return values -def _fillna_prep(values, mask=None, dtype: Optional[DtypeObj] = None): - # boilerplate for pad_1d, backfill_1d, pad_2d, backfill_2d - if dtype is None: - dtype = values.dtype +def _fillna_prep(values, mask=None): + # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d + dtype = values.dtype has_mask = mask is not None if not has_mask: @@ -613,20 +614,20 @@ def _fillna_prep(values, mask=None, dtype: Optional[DtypeObj] = None): return values, mask -def pad_1d(values, limit=None, mask=None, dtype: Optional[DtypeObj] = None): - values, mask = _fillna_prep(values, mask, dtype) +def _pad_1d(values, limit=None, mask=None): + values, mask = _fillna_prep(values, mask) algos.pad_inplace(values, mask, limit=limit) return values -def backfill_1d(values, limit=None, mask=None, dtype: Optional[DtypeObj] = None): - values, mask = _fillna_prep(values, mask, dtype) +def _backfill_1d(values, limit=None, mask=None): + values, mask = _fillna_prep(values, mask) algos.backfill_inplace(values, mask, limit=limit) return values -def pad_2d(values, limit=None, mask=None, dtype: Optional[DtypeObj] = None): - values, mask = _fillna_prep(values, mask, dtype) +def _pad_2d(values, limit=None, mask=None): + values, mask = _fillna_prep(values, mask) if np.all(values.shape): algos.pad_2d_inplace(values, mask, limit=limit) @@ -636,8 +637,8 @@ def pad_2d(values, limit=None, mask=None, dtype: Optional[DtypeObj] = None): return values -def backfill_2d(values, limit=None, mask=None, dtype: Optional[DtypeObj] = None): - values, mask = _fillna_prep(values, mask, dtype) +def _backfill_2d(values, limit=None, mask=None): + values, mask = _fillna_prep(values, mask) if np.all(values.shape): algos.backfill_2d_inplace(values, mask, limit=limit) @@ -647,7 +648,7 @@ def backfill_2d(values, limit=None, mask=None, dtype: Optional[DtypeObj] = None) return values -_fill_methods = {"pad": pad_1d, "backfill": backfill_1d} +_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} def get_fill_func(method):