diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5c39377899a20..35d6e867ff86f 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -410,6 +410,8 @@ Numeric - Bug in :meth:`to_numeric` with ``downcast="unsigned"`` fails for empty data (:issue:`32493`) - Bug in :meth:`DataFrame.mean` with ``numeric_only=False`` and either ``datetime64`` dtype or ``PeriodDtype`` column incorrectly raising ``TypeError`` (:issue:`32426`) - Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`) +- Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`) +- Conversion ^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c3018861bce57..07aa0521b2bec 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6768,6 +6768,11 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> "DataFrame": 5 NaN NaN NaN """ bm_axis = self._get_block_manager_axis(axis) + self._consolidate_inplace() + + if bm_axis == 0 and periods != 0: + return self.T.diff(periods, axis=0).T + new_data = self._mgr.diff(n=periods, axis=bm_axis) return self._constructor(new_data) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d12b78f8d046f..6657465548ebf 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1775,7 +1775,14 @@ def interpolate( ) def diff(self, n: int, axis: int = 1) -> List["Block"]: + if axis == 0 and n != 0: + # n==0 case will be a no-op so let is fall through + # Since we only have one column, the result will be all-NA. + # Create this result by shifting along axis=0 past the length of + # our values. + return super().diff(len(self.values), axis=0) if axis == 1: + # TODO(EA2D): unnecessary with 2D EAs # we are by definition 1D. axis = 0 return super().diff(n, axis) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index ffdb6d41ebda5..6a9248e1cba1e 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -64,18 +64,15 @@ def test_diff_datetime_axis1(self, tz): 1: date_range("2010", freq="D", periods=2, tz=tz), } ) - if tz is None: - result = df.diff(axis=1) - expected = DataFrame( - { - 0: pd.TimedeltaIndex(["NaT", "NaT"]), - 1: pd.TimedeltaIndex(["0 days", "0 days"]), - } - ) - tm.assert_frame_equal(result, expected) - else: - with pytest.raises(NotImplementedError): - result = df.diff(axis=1) + + result = df.diff(axis=1) + expected = DataFrame( + { + 0: pd.TimedeltaIndex(["NaT", "NaT"]), + 1: pd.TimedeltaIndex(["0 days", "0 days"]), + } + ) + tm.assert_frame_equal(result, expected) def test_diff_timedelta(self): # GH#4533 @@ -118,3 +115,46 @@ def test_diff_axis(self): tm.assert_frame_equal( df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]]) ) + + @pytest.mark.xfail( + reason="GH#32995 needs to operate column-wise or do inference", + raises=AssertionError, + ) + def test_diff_period(self): + # GH#32995 Don't pass an incorrect axis + # TODO(EA2D): this bug wouldn't have happened with 2D EA + pi = pd.date_range("2016-01-01", periods=3).to_period("D") + df = pd.DataFrame({"A": pi}) + + result = df.diff(1, axis=1) + + # TODO: should we make Block.diff do type inference? or maybe algos.diff? + expected = (df - pd.NaT).astype(object) + tm.assert_frame_equal(result, expected) + + def test_diff_axis1_mixed_dtypes(self): + # GH#32995 operate column-wise when we have mixed dtypes and axis=1 + df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + + expected = pd.DataFrame({"A": [np.nan, np.nan, np.nan], "B": df["B"] / 2}) + + result = df.diff(axis=1) + tm.assert_frame_equal(result, expected) + + def test_diff_axis1_mixed_dtypes_large_periods(self): + # GH#32995 operate column-wise when we have mixed dtypes and axis=1 + df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + + expected = df * np.nan + + result = df.diff(axis=1, periods=3) + tm.assert_frame_equal(result, expected) + + def test_diff_axis1_mixed_dtypes_negative_periods(self): + # GH#32995 operate column-wise when we have mixed dtypes and axis=1 + df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)}) + + expected = pd.DataFrame({"A": -1.0 * df["A"], "B": df["B"] * np.nan}) + + result = df.diff(axis=1, periods=-1) + tm.assert_frame_equal(result, expected)