diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index fecfc956852e3..653082fa3b67a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -753,6 +753,7 @@ Other - Bug in :meth:`Series.to_frame` and :meth:`Index.to_frame` ignoring the ``name`` argument when ``name=None`` is explicitly passed (:issue:`44212`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` with ``value=None`` and ExtensionDtypes (:issue:`44270`) - Bug in :meth:`FloatingArray.equals` failing to consider two arrays equal if they contain ``np.nan`` values (:issue:`44382`) +- Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` columns incorrectly raising when an incompatible ``fill_value`` is passed (:issue:`44564`) - Bug in :meth:`DataFrame.diff` when passing a NumPy integer object instead of an ``int`` object (:issue:`44572`) - diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 74d785586b950..d69709bf9d06c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -36,6 +36,7 @@ is_1d_only_ea_dtype, is_dtype_equal, is_list_like, + needs_i8_conversion, ) from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( @@ -362,7 +363,28 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: if fill_value is lib.no_default: fill_value = None - if axis == 0 and self.ndim == 2 and self.nblocks > 1: + if ( + axis == 0 + and self.ndim == 2 + and ( + self.nblocks > 1 + or ( + # If we only have one block and we know that we can't + # keep the same dtype (i.e. the _can_hold_element check) + # then we can go through the reindex_indexer path + # (and avoid casting logic in the Block method). + # The exception to this (until 2.0) is datetimelike + # dtypes with integers, which cast. + not self.blocks[0]._can_hold_element(fill_value) + # TODO(2.0): remove special case for integer-with-datetimelike + # once deprecation is enforced + and not ( + lib.is_integer(fill_value) + and needs_i8_conversion(self.blocks[0].dtype) + ) + ) + ) + ): # GH#35488 we need to watch out for multi-block cases # We only get here with fill_value not-lib.no_default ncols = self.shape[0] diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index d8511581f0e94..9cd0b8bb5b315 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -331,6 +331,83 @@ def test_shift_dt64values_int_fill_deprecated(self): expected = DataFrame({"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]}) tm.assert_frame_equal(result, expected) + # same thing but not consolidated + # This isn't great that we get different behavior, but + # that will go away when the deprecation is enforced + df3 = DataFrame({"A": ser}) + df3["B"] = ser + assert len(df3._mgr.arrays) == 2 + result = df3.shift(1, axis=1, fill_value=0) + expected = DataFrame({"A": [0, 0], "B": df2["A"]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "as_cat", + [ + pytest.param( + True, + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + False, + ], + ) + @pytest.mark.parametrize( + "vals", + [ + date_range("2020-01-01", periods=2), + date_range("2020-01-01", periods=2, tz="US/Pacific"), + pd.period_range("2020-01-01", periods=2, freq="D"), + pd.timedelta_range("2020 Days", periods=2, freq="D"), + pd.interval_range(0, 3, periods=2), + pytest.param( + pd.array([1, 2], dtype="Int64"), + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + pytest.param( + pd.array([1, 2], dtype="Float32"), + marks=pytest.mark.xfail( + reason="_can_hold_element incorrectly always returns True" + ), + ), + ], + ids=lambda x: str(x.dtype), + ) + def test_shift_dt64values_axis1_invalid_fill( + self, vals, as_cat, using_array_manager, request + ): + # GH#44564 + if using_array_manager: + mark = pytest.mark.xfail(raises=NotImplementedError) + request.node.add_marker(mark) + + ser = Series(vals) + if as_cat: + ser = ser.astype("category") + + df = DataFrame({"A": ser}) + result = df.shift(-1, axis=1, fill_value="foo") + expected = DataFrame({"A": ["foo", "foo"]}) + tm.assert_frame_equal(result, expected) + + # same thing but multiple blocks + df2 = DataFrame({"A": ser, "B": ser}) + df2._consolidate_inplace() + + result = df2.shift(-1, axis=1, fill_value="foo") + expected = DataFrame({"A": df2["B"], "B": ["foo", "foo"]}) + tm.assert_frame_equal(result, expected) + + # same thing but not consolidated + df3 = DataFrame({"A": ser}) + df3["B"] = ser + assert len(df3._mgr.arrays) == 2 + result = df3.shift(-1, axis=1, fill_value="foo") + tm.assert_frame_equal(result, expected) + def test_shift_axis1_categorical_columns(self): # GH#38434 ci = CategoricalIndex(["a", "b", "c"])