Skip to content

Commit a142ad7

Browse files
authored
BUG: DataFrame.diff(axis=1) with mixed (or EA) dtypes (#32995)
1 parent 185a654 commit a142ad7

File tree

4 files changed

+66
-12
lines changed

4 files changed

+66
-12
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,8 @@ Numeric
413413
- Bug in :meth:`to_numeric` with ``downcast="unsigned"`` fails for empty data (:issue:`32493`)
414414
- Bug in :meth:`DataFrame.mean` with ``numeric_only=False`` and either ``datetime64`` dtype or ``PeriodDtype`` column incorrectly raising ``TypeError`` (:issue:`32426`)
415415
- Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`)
416+
- Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`)
417+
-
416418

417419
Conversion
418420
^^^^^^^^^^

pandas/core/frame.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6774,6 +6774,11 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> "DataFrame":
67746774
5 NaN NaN NaN
67756775
"""
67766776
bm_axis = self._get_block_manager_axis(axis)
6777+
self._consolidate_inplace()
6778+
6779+
if bm_axis == 0 and periods != 0:
6780+
return self.T.diff(periods, axis=0).T
6781+
67776782
new_data = self._mgr.diff(n=periods, axis=bm_axis)
67786783
return self._constructor(new_data)
67796784

pandas/core/internals/blocks.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1773,7 +1773,14 @@ def interpolate(
17731773
)
17741774

17751775
def diff(self, n: int, axis: int = 1) -> List["Block"]:
1776+
if axis == 0 and n != 0:
1777+
# n==0 case will be a no-op so let is fall through
1778+
# Since we only have one column, the result will be all-NA.
1779+
# Create this result by shifting along axis=0 past the length of
1780+
# our values.
1781+
return super().diff(len(self.values), axis=0)
17761782
if axis == 1:
1783+
# TODO(EA2D): unnecessary with 2D EAs
17771784
# we are by definition 1D.
17781785
axis = 0
17791786
return super().diff(n, axis)

pandas/tests/frame/methods/test_diff.py

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,15 @@ def test_diff_datetime_axis1(self, tz):
6464
1: date_range("2010", freq="D", periods=2, tz=tz),
6565
}
6666
)
67-
if tz is None:
68-
result = df.diff(axis=1)
69-
expected = DataFrame(
70-
{
71-
0: pd.TimedeltaIndex(["NaT", "NaT"]),
72-
1: pd.TimedeltaIndex(["0 days", "0 days"]),
73-
}
74-
)
75-
tm.assert_frame_equal(result, expected)
76-
else:
77-
with pytest.raises(NotImplementedError):
78-
result = df.diff(axis=1)
67+
68+
result = df.diff(axis=1)
69+
expected = DataFrame(
70+
{
71+
0: pd.TimedeltaIndex(["NaT", "NaT"]),
72+
1: pd.TimedeltaIndex(["0 days", "0 days"]),
73+
}
74+
)
75+
tm.assert_frame_equal(result, expected)
7976

8077
def test_diff_timedelta(self):
8178
# GH#4533
@@ -118,3 +115,46 @@ def test_diff_axis(self):
118115
tm.assert_frame_equal(
119116
df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]])
120117
)
118+
119+
@pytest.mark.xfail(
120+
reason="GH#32995 needs to operate column-wise or do inference",
121+
raises=AssertionError,
122+
)
123+
def test_diff_period(self):
124+
# GH#32995 Don't pass an incorrect axis
125+
# TODO(EA2D): this bug wouldn't have happened with 2D EA
126+
pi = pd.date_range("2016-01-01", periods=3).to_period("D")
127+
df = pd.DataFrame({"A": pi})
128+
129+
result = df.diff(1, axis=1)
130+
131+
# TODO: should we make Block.diff do type inference? or maybe algos.diff?
132+
expected = (df - pd.NaT).astype(object)
133+
tm.assert_frame_equal(result, expected)
134+
135+
def test_diff_axis1_mixed_dtypes(self):
136+
# GH#32995 operate column-wise when we have mixed dtypes and axis=1
137+
df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})
138+
139+
expected = pd.DataFrame({"A": [np.nan, np.nan, np.nan], "B": df["B"] / 2})
140+
141+
result = df.diff(axis=1)
142+
tm.assert_frame_equal(result, expected)
143+
144+
def test_diff_axis1_mixed_dtypes_large_periods(self):
145+
# GH#32995 operate column-wise when we have mixed dtypes and axis=1
146+
df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})
147+
148+
expected = df * np.nan
149+
150+
result = df.diff(axis=1, periods=3)
151+
tm.assert_frame_equal(result, expected)
152+
153+
def test_diff_axis1_mixed_dtypes_negative_periods(self):
154+
# GH#32995 operate column-wise when we have mixed dtypes and axis=1
155+
df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})
156+
157+
expected = pd.DataFrame({"A": -1.0 * df["A"], "B": df["B"] * np.nan})
158+
159+
result = df.diff(axis=1, periods=-1)
160+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)