From 1352371d9a86016d0adecdf565b710380b7c9a1f Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Sat, 1 Jul 2023 15:16:02 +0800 Subject: [PATCH 1/8] BUG: interpolate with fillna methods fail to fill across multiblocks --- pandas/core/generic.py | 13 ++++++++++--- pandas/tests/frame/methods/test_interpolate.py | 9 ++++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b806ddbaa89ba..53f05e763d12e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7968,9 +7968,17 @@ def interpolate( axis = self._get_axis_number(axis) fillna_methods = ["ffill", "bfill", "pad", "backfill"] - should_transpose = axis == 1 and method not in fillna_methods - obj = self.T if should_transpose else self + if method not in fillna_methods: + should_transpose = axis == 1 + elif not self._mgr.is_single_block and axis == 1: + if inplace: + raise NotImplementedError() + should_transpose = True + else: + should_transpose = False + + obj, axis = (self.T, 1 - axis) if should_transpose else (self, axis) if obj.empty: if inplace: @@ -8024,7 +8032,6 @@ def interpolate( # TODO(3.0): remove this case # TODO: warn/raise on limit_direction or kwargs which are ignored? # as of 2023-06-26 no tests get here with either - new_data = obj._mgr.pad_or_backfill( method=method, axis=axis, diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index ac862e5673411..fcdf7db35446e 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -451,8 +451,11 @@ def test_interp_string_axis(self, axis_name, axis_number): expected = df.interpolate(method="linear", axis=axis_number) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("multiblock", [True, False]) @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"]) - def test_interp_fillna_methods(self, request, axis, method, using_array_manager): + def test_interp_fillna_methods( + self, request, axis, multiblock, method, using_array_manager + ): # GH 12918 if using_array_manager and axis in (1, "columns"): # TODO(ArrayManager) support axis=1 @@ -465,6 +468,10 @@ def test_interp_fillna_methods(self, request, axis, method, using_array_manager) "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], } ) + if multiblock: + df["D"] = np.nan + df["E"] = 1.0 + method2 = method if method != "pad" else "ffill" expected = getattr(df, method2)(axis=axis) msg = f"DataFrame.interpolate with method={method} is deprecated" From 2a7c4d2ee802e97db76520243ac6dad369ba62e1 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Sat, 1 Jul 2023 15:20:26 +0800 Subject: [PATCH 2/8] changelog added --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index ebbdbcb0f61f5..2c9699f509b6b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -434,6 +434,7 @@ Indexing Missing ^^^^^^^ +- Bug in :meth:`DataFrame.interpolate` failing to fill across multiblock data when ``method`` is "pad", "ffill", "bfill", or "backfill" (:issue:`53898`) - Bug in :meth:`DataFrame.interpolate` ignoring ``inplace`` when :class:`DataFrame` is empty (:issue:`53199`) - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` failing to raise on invalid ``downcast`` keyword, which can be only ``None`` or "infer" (:issue:`53103`) - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with complex dtype incorrectly failing to fill ``NaN`` entries (:issue:`53635`) From 94e9ae2728327261566927cf584c658559fa35f9 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Sun, 2 Jul 2023 00:48:47 +0800 Subject: [PATCH 3/8] resolved conversation --- pandas/core/generic.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 53f05e763d12e..b26672369ed26 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7969,18 +7969,7 @@ def interpolate( fillna_methods = ["ffill", "bfill", "pad", "backfill"] - if method not in fillna_methods: - should_transpose = axis == 1 - elif not self._mgr.is_single_block and axis == 1: - if inplace: - raise NotImplementedError() - should_transpose = True - else: - should_transpose = False - - obj, axis = (self.T, 1 - axis) if should_transpose else (self, axis) - - if obj.empty: + if self.empty: if inplace: return None return self.copy() @@ -7996,9 +7985,9 @@ def interpolate( FutureWarning, stacklevel=find_stack_level(), ) - elif np.any(obj.dtypes == object): + elif np.any(self.dtypes == object): # GH#53631 - if not (obj.ndim == 2 and np.all(obj.dtypes == object)): + if not (self.ndim == 2 and np.all(self.dtypes == object)): # don't warn in cases that already raise warnings.warn( f"{type(self).__name__}.interpolate with object dtype is " @@ -8008,14 +7997,14 @@ def interpolate( stacklevel=find_stack_level(), ) - if isinstance(obj.index, MultiIndex) and method != "linear": + if isinstance(self.index, MultiIndex) and method != "linear": raise ValueError( "Only `method=linear` interpolation is supported on MultiIndexes." ) limit_direction = missing.infer_limit_direction(limit_direction, method) - if obj.ndim == 2 and np.all(obj.dtypes == np.dtype("object")): + if self.ndim == 2 and np.all(self.dtypes == np.dtype("object")): raise TypeError( "Cannot interpolate with all object-dtype columns " "in the DataFrame. Try setting at least one " @@ -8032,6 +8021,13 @@ def interpolate( # TODO(3.0): remove this case # TODO: warn/raise on limit_direction or kwargs which are ignored? # as of 2023-06-26 no tests get here with either + if not self._mgr.is_single_block and axis == 1: + if inplace: + raise NotImplementedError() + obj, axis, should_transpose = self.T, 1 - axis, True + else: + obj, should_transpose = self, False + new_data = obj._mgr.pad_or_backfill( method=method, axis=axis, @@ -8041,6 +8037,7 @@ def interpolate( downcast=downcast, ) else: + obj, should_transpose = (self.T, True) if axis == 1 else (self, False) index = missing.get_interp_index(method, obj.index) axis = self._info_axis_number new_data = obj._mgr.interpolate( From 894b82b5a532ce768ffa0065ecc128ab0b8a1464 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Tue, 4 Jul 2023 15:04:23 +0800 Subject: [PATCH 4/8] reorganize code; resolve conversations --- pandas/core/generic.py | 60 ++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b26672369ed26..7cf9c763dc6b5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7963,20 +7963,11 @@ def interpolate( if downcast is not None and downcast != "infer": raise ValueError("downcast must be either None or 'infer'") - inplace = validate_bool_kwarg(inplace, "inplace") - - axis = self._get_axis_number(axis) - - fillna_methods = ["ffill", "bfill", "pad", "backfill"] - - if self.empty: - if inplace: - return None - return self.copy() - if not isinstance(method, str): raise ValueError("'method' should be a string, not None.") - elif method.lower() in fillna_methods: + + fillna_methods = ["ffill", "bfill", "pad", "backfill"] + if method.lower() in fillna_methods: # GH#53581 warnings.warn( f"{type(self).__name__}.interpolate with method={method} is " @@ -7985,25 +7976,26 @@ def interpolate( FutureWarning, stacklevel=find_stack_level(), ) - elif np.any(self.dtypes == object): - # GH#53631 - if not (self.ndim == 2 and np.all(self.dtypes == object)): - # don't warn in cases that already raise - warnings.warn( - f"{type(self).__name__}.interpolate with object dtype is " - "deprecated and will raise in a future version. Call " - "obj.infer_objects(copy=False) before interpolating instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) + + if "fill_value" in kwargs: + raise ValueError( + "'fill_value' is not a valid keyword for " + f"{type(self).__name__}.interpolate" + ) + + inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + + if self.empty: + if inplace: + return None + return self.copy() if isinstance(self.index, MultiIndex) and method != "linear": raise ValueError( "Only `method=linear` interpolation is supported on MultiIndexes." ) - limit_direction = missing.infer_limit_direction(limit_direction, method) - if self.ndim == 2 and np.all(self.dtypes == np.dtype("object")): raise TypeError( "Cannot interpolate with all object-dtype columns " @@ -8011,16 +8003,12 @@ def interpolate( "column to a numeric dtype." ) - if "fill_value" in kwargs: - raise ValueError( - "'fill_value' is not a valid keyword for " - f"{type(self).__name__}.interpolate" - ) + limit_direction = missing.infer_limit_direction(limit_direction, method) if method.lower() in fillna_methods: # TODO(3.0): remove this case # TODO: warn/raise on limit_direction or kwargs which are ignored? - # as of 2023-06-26 no tests get here with either + # as of 2023-06-26 no tests get here with either if not self._mgr.is_single_block and axis == 1: if inplace: raise NotImplementedError() @@ -8038,8 +8026,18 @@ def interpolate( ) else: obj, should_transpose = (self.T, True) if axis == 1 else (self, False) + if np.any(obj.dtypes == object): + # GH#53631 + warnings.warn( + f"{type(self).__name__}.interpolate with object dtype is " + "deprecated and will raise in a future version. Call " + "obj.infer_objects(copy=False) before interpolating instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) index = missing.get_interp_index(method, obj.index) axis = self._info_axis_number + new_data = obj._mgr.interpolate( method=method, axis=axis, From 138534e7894f2c27b0efb14e8df12102e6886ca0 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Tue, 4 Jul 2023 18:27:40 +0800 Subject: [PATCH 5/8] retrigger checks From d79941411f02f5ab6b251688d42985429b2b09ac Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Sat, 8 Jul 2023 11:37:18 +0800 Subject: [PATCH 6/8] try to resolve conversation --- pandas/core/generic.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ad27bbc58d610..018d97c529cdb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7985,6 +7985,7 @@ def interpolate( fillna_methods = ["ffill", "bfill", "pad", "backfill"] if method.lower() in fillna_methods: # GH#53581 + # postpone setting obj, should_transpose warnings.warn( f"{type(self).__name__}.interpolate with method={method} is " "deprecated and will raise in a future version. " @@ -7992,6 +7993,19 @@ def interpolate( FutureWarning, stacklevel=find_stack_level(), ) + else: + obj, should_transpose = (self.T, True) if axis == 1 else (self, False) + if np.any(obj.dtypes == object): + # GH#53631 + if not (self.ndim == 2 and np.all(self.dtypes == object)): + # don't warn in cases that already raise + warnings.warn( + f"{type(self).__name__}.interpolate with object dtype is " + "deprecated and will raise in a future version. Call " + "obj.infer_objects(copy=False) before interpolating instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) if "fill_value" in kwargs: raise ValueError( @@ -8012,7 +8026,7 @@ def interpolate( "Only `method=linear` interpolation is supported on MultiIndexes." ) - if self.ndim == 2 and np.all(self.dtypes == np.dtype("object")): + if self.ndim == 2 and np.all(self.dtypes == object): raise TypeError( "Cannot interpolate with all object-dtype columns " "in the DataFrame. Try setting at least one " @@ -8041,18 +8055,8 @@ def interpolate( downcast=downcast, ) else: - obj, should_transpose = (self.T, True) if axis == 1 else (self, False) - if np.any(obj.dtypes == object): - # GH#53631 - warnings.warn( - f"{type(self).__name__}.interpolate with object dtype is " - "deprecated and will raise in a future version. Call " - "obj.infer_objects(copy=False) before interpolating instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) + # obj, should_transpose are already set index = missing.get_interp_index(method, obj.index) - new_data = obj._mgr.interpolate( method=method, index=index, From bf39371868722249607bb5c6585dc87bced64efa Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Sat, 8 Jul 2023 14:41:37 +0800 Subject: [PATCH 7/8] fix failing tests --- pandas/core/generic.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 018d97c529cdb..baa4fd0c81548 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7979,13 +7979,20 @@ def interpolate( if downcast is not None and downcast != "infer": raise ValueError("downcast must be either None or 'infer'") + inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + + if self.empty: + if inplace: + return None + return self.copy() + if not isinstance(method, str): raise ValueError("'method' should be a string, not None.") fillna_methods = ["ffill", "bfill", "pad", "backfill"] if method.lower() in fillna_methods: # GH#53581 - # postpone setting obj, should_transpose warnings.warn( f"{type(self).__name__}.interpolate with method={method} is " "deprecated and will raise in a future version. " @@ -7993,11 +8000,12 @@ def interpolate( FutureWarning, stacklevel=find_stack_level(), ) + obj, should_transpose = self, False else: obj, should_transpose = (self.T, True) if axis == 1 else (self, False) if np.any(obj.dtypes == object): # GH#53631 - if not (self.ndim == 2 and np.all(self.dtypes == object)): + if not (obj.ndim == 2 and np.all(obj.dtypes == object)): # don't warn in cases that already raise warnings.warn( f"{type(self).__name__}.interpolate with object dtype is " @@ -8013,38 +8021,29 @@ def interpolate( f"{type(self).__name__}.interpolate" ) - inplace = validate_bool_kwarg(inplace, "inplace") - axis = self._get_axis_number(axis) - - if self.empty: - if inplace: - return None - return self.copy() - - if isinstance(self.index, MultiIndex) and method != "linear": + if isinstance(obj.index, MultiIndex) and method != "linear": raise ValueError( "Only `method=linear` interpolation is supported on MultiIndexes." ) - if self.ndim == 2 and np.all(self.dtypes == object): + limit_direction = missing.infer_limit_direction(limit_direction, method) + + if obj.ndim == 2 and np.all(obj.dtypes == object): raise TypeError( "Cannot interpolate with all object-dtype columns " "in the DataFrame. Try setting at least one " "column to a numeric dtype." ) - limit_direction = missing.infer_limit_direction(limit_direction, method) - if method.lower() in fillna_methods: # TODO(3.0): remove this case # TODO: warn/raise on limit_direction or kwargs which are ignored? # as of 2023-06-26 no tests get here with either if not self._mgr.is_single_block and axis == 1: + # GH#53898 if inplace: raise NotImplementedError() obj, axis, should_transpose = self.T, 1 - axis, True - else: - obj, should_transpose = self, False new_data = obj._mgr.pad_or_backfill( method=method, @@ -8055,7 +8054,6 @@ def interpolate( downcast=downcast, ) else: - # obj, should_transpose are already set index = missing.get_interp_index(method, obj.index) new_data = obj._mgr.interpolate( method=method, From 1c0fbb6b2ae805f3a0af49fcc4e8f6cbb9fc704f Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Mon, 10 Jul 2023 10:20:20 +0800 Subject: [PATCH 8/8] resolve conversation --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index baa4fd0c81548..59d22126ef0f0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8038,7 +8038,7 @@ def interpolate( if method.lower() in fillna_methods: # TODO(3.0): remove this case # TODO: warn/raise on limit_direction or kwargs which are ignored? - # as of 2023-06-26 no tests get here with either + # as of 2023-06-26 no tests get here with either if not self._mgr.is_single_block and axis == 1: # GH#53898 if inplace: