From 56dd682f294f20b37abdf9a4155b5a1b0f5bf4c2 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 21 Nov 2021 09:20:09 -0800 Subject: [PATCH 1/4] BUG: DataFrame.shift with axis=1 and mismatched fill_value --- pandas/core/internals/managers.py | 18 +++++- pandas/tests/frame/methods/test_shift.py | 70 ++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 74d785586b950..04755f2955a4e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -36,6 +36,7 @@ is_1d_only_ea_dtype, is_dtype_equal, is_list_like, + needs_i8_conversion, ) from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( @@ -362,7 +363,22 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: if fill_value is lib.no_default: fill_value = None - if axis == 0 and self.ndim == 2 and self.nblocks > 1: + if ( + axis == 0 + and self.ndim == 2 + and ( + self.nblocks > 1 + or ( + not self.blocks[0]._can_hold_element(fill_value) + # TODO(2.0): remove special case for integer-with-datetimelike + # once deprecation is enforced + and not ( + lib.is_integer(fill_value) + and needs_i8_conversion(self.blocks[0].dtype) + ) + ) + ) + ): # GH#35488 we need to watch out for multi-block cases # We only get here with fill_value not-lib.no_default ncols = self.shape[0] diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index d8511581f0e94..2398ab667aba8 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -331,6 +331,76 @@ def test_shift_dt64values_int_fill_deprecated(self): expected = DataFrame({"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]}) tm.assert_frame_equal(result, expected) + # same thing but not consolidated + # This isn't great that we get different behavior, but + # that will go away when the deprecation is enforced + df3 = DataFrame({"A": ser}) + df3["B"] = ser + assert len(df3._mgr.arrays) == 2 + result = df3.shift(1, axis=1, fill_value=0) + expected = DataFrame({"A": [0, 0], "B": df2["A"]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "as_cat", + [ + pytest.param( + True, + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + False, + ], + ) + @pytest.mark.parametrize( + "vals", + [ + date_range("2020-01-01", periods=2), + date_range("2020-01-01", periods=2, tz="US/Pacific"), + pd.period_range("2020-01-01", periods=2, freq="D"), + pd.timedelta_range("2020 Days", periods=2, freq="D"), + pd.interval_range(0, 3, periods=2), + pytest.param( + pd.array([1, 2], dtype="Int64"), + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + pytest.param( + pd.array([1, 2], dtype="Float32"), + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + ], + ids=lambda x: str(x.dtype), + ) + def test_shift_dt64values_axis1_invalid_fill(self, vals, as_cat): + ser = Series(vals) + if as_cat: + ser = ser.astype("category") + + df = DataFrame({"A": ser}) + result = df.shift(-1, axis=1, fill_value="foo") + expected = DataFrame({"A": ["foo", "foo"]}) + tm.assert_frame_equal(result, expected) + + # same thing but multiple blocks + df2 = DataFrame({"A": ser, "B": ser}) + df2._consolidate_inplace() + + result = df2.shift(-1, axis=1, fill_value="foo") + expected = DataFrame({"A": df2["B"], "B": ["foo", "foo"]}) + tm.assert_frame_equal(result, expected) + + # same thing but not consolidated + df3 = DataFrame({"A": ser}) + df3["B"] = ser + assert len(df3._mgr.arrays) == 2 + result = df3.shift(-1, axis=1, fill_value="foo") + tm.assert_frame_equal(result, expected) + def test_shift_axis1_categorical_columns(self): # GH#38434 ci = CategoricalIndex(["a", "b", "c"]) From d3284065a21aeed754a6df0d2cbe6e076d1c8a80 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 21 Nov 2021 09:22:12 -0800 Subject: [PATCH 2/4] xfail for ArrayManager --- pandas/tests/frame/methods/test_shift.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 2398ab667aba8..6e633279e9c32 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -376,7 +376,13 @@ def test_shift_dt64values_int_fill_deprecated(self): ], ids=lambda x: str(x.dtype), ) - def test_shift_dt64values_axis1_invalid_fill(self, vals, as_cat): + def test_shift_dt64values_axis1_invalid_fill( + self, vals, as_cat, using_array_manager, request + ): + if using_array_manager: + mark = pytest.mark.xfail(raises=NotImplementedError) + request.node.add_marker(mark) + ser = Series(vals) if as_cat: ser = ser.astype("category") From f6dabad5742936461311f6cad9e551e80ef5992b Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 21 Nov 2021 16:36:25 -0800 Subject: [PATCH 3/4] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 1f656f267783f..3fe5c05d92c4b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -741,6 +741,7 @@ Other - Bug in :meth:`Series.to_frame` and :meth:`Index.to_frame` ignoring the ``name`` argument when ``name=None`` is explicitly passed (:issue:`44212`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` with ``value=None`` and ExtensionDtypes (:issue:`44270`) - Bug in :meth:`FloatingArray.equals` failing to consider two arrays equal if they contain ``np.nan`` values (:issue:`44382`) +- Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` incorrectly raising (:issue:`44564`) - .. ***DO NOT USE THIS SECTION*** From b399a92fadf7776c2a0cf80d332a93ab992f4e7b Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 21 Nov 2021 16:37:28 -0800 Subject: [PATCH 4/4] GH ref, reword whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/tests/frame/methods/test_shift.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 3fe5c05d92c4b..c9931622ca7cc 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -741,7 +741,7 @@ Other - Bug in :meth:`Series.to_frame` and :meth:`Index.to_frame` ignoring the ``name`` argument when ``name=None`` is explicitly passed (:issue:`44212`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` with ``value=None`` and ExtensionDtypes (:issue:`44270`) - Bug in :meth:`FloatingArray.equals` failing to consider two arrays equal if they contain ``np.nan`` values (:issue:`44382`) -- Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` incorrectly raising (:issue:`44564`) +- Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` columns incorrectly raising when an incompatible ``fill_value`` is passed (:issue:`44564`) - .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 6e633279e9c32..9cd0b8bb5b315 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -379,6 +379,7 @@ def test_shift_dt64values_int_fill_deprecated(self): def test_shift_dt64values_axis1_invalid_fill( self, vals, as_cat, using_array_manager, request ): + # GH#44564 if using_array_manager: mark = pytest.mark.xfail(raises=NotImplementedError) request.node.add_marker(mark)