diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5c74965bffdd7..dd6ca8a0c969a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -733,6 +733,8 @@ Missing - Bug in :meth:`replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ```` was raising a ``TypeError``. The method now handles this by ignoring ```` values when doing the comparison for the replacement (:issue:`32621`) - Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ```` for all ``False`` or all ``True`` values using the nulllable boolean dtype and with ``skipna=False`` (:issue:`33253`) - Clarified documentation on interpolate with method =akima. The ``der`` parameter must be scalar or None (:issue:`33426`) +- :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`) +- Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85b6a8431617a..e9f7bf457cbfd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6881,30 +6881,42 @@ def interpolate( inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) + index = self._get_axis(axis) + + if isinstance(self.index, MultiIndex) and method != "linear": + raise ValueError( + "Only `method=linear` interpolation is supported on MultiIndexes." + ) + + # for the methods backfill, bfill, pad, ffill limit_direction and limit_area + # are being ignored, see gh-26796 for more information + if method in ["backfill", "bfill", "pad", "ffill"]: + return self.fillna( + method=method, + axis=axis, + inplace=inplace, + limit=limit, + downcast=downcast, + ) + # Currently we need this to call the axis correctly inside the various + # interpolation methods if axis == 0: df = self else: df = self.T - if isinstance(df.index, MultiIndex) and method != "linear": - raise ValueError( - "Only `method=linear` interpolation is supported on MultiIndexes." - ) - - if df.ndim == 2 and np.all(df.dtypes == np.dtype(object)): + if self.ndim == 2 and np.all(self.dtypes == np.dtype(object)): raise TypeError( "Cannot interpolate with all object-dtype columns " "in the DataFrame. Try setting at least one " "column to a numeric dtype." ) - # create/use the index if method == "linear": # prior default index = np.arange(len(df.index)) else: - index = df.index methods = {"index", "values", "nearest", "time"} is_numeric_or_datetime = ( is_numeric_dtype(index.dtype) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 291a46cf03216..efb3d719016bb 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -202,7 +202,7 @@ def test_interp_leading_nans(self, check_scipy): result = df.interpolate(method="polynomial", order=1) tm.assert_frame_equal(result, expected) - def test_interp_raise_on_only_mixed(self): + def test_interp_raise_on_only_mixed(self, axis): df = DataFrame( { "A": [1, 2, np.nan, 4], @@ -212,8 +212,13 @@ def test_interp_raise_on_only_mixed(self): "E": [1, 2, 3, 4], } ) - with pytest.raises(TypeError): - df.interpolate(axis=1) + msg = ( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + with pytest.raises(TypeError, match=msg): + df.astype("object").interpolate(axis=axis) def test_interp_raise_on_all_object_dtype(self): # GH 22985 @@ -272,7 +277,6 @@ def test_interp_ignore_all_good(self): result = df[["B", "D"]].interpolate(downcast=None) tm.assert_frame_equal(result, df[["B", "D"]]) - @pytest.mark.parametrize("axis", [0, 1]) def test_interp_time_inplace_axis(self, axis): # GH 9687 periods = 5 @@ -296,3 +300,17 @@ def test_interp_string_axis(self, axis_name, axis_number): result = df.interpolate(method="linear", axis=axis_name) expected = df.interpolate(method="linear", axis=axis_number) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"]) + def test_interp_fillna_methods(self, axis, method): + # GH 12918 + df = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], + "B": [2.0, 4.0, 6.0, np.nan, 8.0, 10.0], + "C": [3.0, 6.0, 9.0, np.nan, np.nan, 30.0], + } + ) + expected = df.fillna(axis=axis, method=method) + result = df.interpolate(method=method, axis=axis) + tm.assert_frame_equal(result, expected)