From 87ff829b6fb6bfe5f22041fdb496297afd1e751d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 10 Mar 2020 11:57:52 -0700 Subject: [PATCH 1/3] Implement DatetimelikeArrayMixin.shift --- pandas/core/arrays/datetimelike.py | 50 +++++++++++++++++++++++ pandas/core/internals/blocks.py | 13 +++--- pandas/tests/arrays/test_datetimelike.py | 17 ++++++++ pandas/tests/frame/methods/test_shift.py | 23 +++++++++++ pandas/tests/series/methods/test_shift.py | 10 +++++ 5 files changed, 108 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8c870c6255200..1cdac30672ce4 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -745,6 +745,56 @@ def _from_factorized(cls, values, original): def _values_for_argsort(self): return self._data + def shift(self, periods=1, fill_value=None, axis=0): + if not self.size or periods == 0: + return self.copy() + + if is_valid_nat_for_dtype(fill_value, self.dtype): + fill_value = NaT + elif not isinstance(fill_value, self._recognized_scalars): + # only warn if we're not going to raise + if self._scalar_type is Period and lib.is_integer(fill_value): + # kludge for #31971 + new_fill = Period._from_ordinal(fill_value, freq=self.freq) + else: + new_fill = self._scalar_type(fill_value) + + # stacklevel here is chosen to be correct when called from + # DataFrame.shift or Series.shift + warnings.warn( + f"Passing {type(fill_value)} to shift is deprecated and " + "will raise in a future version, pass " + f"{self._scalar_type.__name__} instead.", + FutureWarning, + stacklevel=7, + ) + fill_value = new_fill + + fill_value = self._unbox_scalar(fill_value) + + new_values = self._data + + # make sure array sent to np.roll is c_contiguous + f_ordered = new_values.flags.f_contiguous + if f_ordered: + new_values = new_values.T + axis = new_values.ndim - axis - 1 + + new_values = np.roll(new_values, periods, axis=axis) + + axis_indexer = [slice(None)] * self.ndim + if periods > 0: + axis_indexer[axis] = slice(None, periods) + else: + axis_indexer[axis] = slice(periods, None) + new_values[tuple(axis_indexer)] = fill_value + + # restore original order + if f_ordered: + new_values = new_values.T + + return type(self)._simple_new(new_values, dtype=self.dtype) + # ------------------------------------------------------------------ # Additional array methods # These are not part of the EA API, but we implement them because diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c088b7020927b..145edee170ba9 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1916,10 +1916,7 @@ def diff(self, n: int, axis: int = 1) -> List["Block"]: return super().diff(n, axis) def shift( - self, - periods: int, - axis: libinternals.BlockPlacement = 0, - fill_value: Any = None, + self, periods: int, axis: int = 0, fill_value: Any = None, ) -> List["ExtensionBlock"]: """ Shift the block by `periods`. @@ -2172,7 +2169,7 @@ def internal_values(self): def iget(self, key): # GH#31649 we need to wrap scalars in Timestamp/Timedelta - # TODO: this can be removed if we ever have 2D EA + # TODO(EA2D): this can be removed if we ever have 2D EA result = super().iget(key) if isinstance(result, np.datetime64): result = Timestamp(result) @@ -2180,6 +2177,12 @@ def iget(self, key): result = Timedelta(result) return result + def shift(self, periods, axis=0, fill_value=None): + # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs + values = self.array_values() + new_values = values.shift(periods, fill_value=fill_value, axis=axis) + return self.make_block_same_class(new_values) + class DatetimeBlock(DatetimeLikeBlockMixin, Block): __slots__ = () diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index f99ee542d543c..b8a70752330c5 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -240,6 +240,23 @@ def test_inplace_arithmetic(self): arr -= pd.Timedelta(days=1) tm.assert_equal(arr, expected) + def test_shift_fill_int_deprecated(self): + # GH#31971 + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = arr.shift(1, fill_value=1) + + expected = arr.copy() + if self.array_cls is PeriodArray: + fill_val = PeriodArray._scalar_type._from_ordinal(1, freq=arr.freq) + else: + fill_val = arr._scalar_type(1) + expected[0] = fill_val + expected[1:] = arr[:-1] + tm.assert_equal(result, expected) + class TestDatetimeArray(SharedTests): index_cls = pd.DatetimeIndex diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index cfb17de892b1c..f6c89172bbf86 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -185,3 +185,26 @@ def test_tshift(self, datetime_frame): msg = "Freq was not given and was not set in the index" with pytest.raises(ValueError, match=msg): no_freq.tshift() + + def test_shift_dt64values_int_fill_deprecated(self): + # GH#31971 + ser = pd.Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) + df = ser.to_frame() + + with tm.assert_produces_warning(FutureWarning): + result = df.shift(1, fill_value=0) + + expected = pd.Series([pd.Timestamp(0), ser[0]]).to_frame() + tm.assert_frame_equal(result, expected) + + # axis = 1 + df2 = pd.DataFrame({"A": ser, "B": ser}) + df2._consolidate_inplace() + + with tm.assert_produces_warning(FutureWarning): + result = df2.shift(1, axis=1, fill_value=0) + + expected = pd.DataFrame( + {"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]} + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_shift.py b/pandas/tests/series/methods/test_shift.py index 8256e2f33b936..e8d7f5958d0a1 100644 --- a/pandas/tests/series/methods/test_shift.py +++ b/pandas/tests/series/methods/test_shift.py @@ -263,3 +263,13 @@ def test_shift_categorical(self): tm.assert_index_equal(s.values.categories, sp1.values.categories) tm.assert_index_equal(s.values.categories, sn2.values.categories) + + def test_shift_dt64values_int_fill_deprecated(self): + # GH#31971 + ser = pd.Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) + + with tm.assert_produces_warning(FutureWarning): + result = ser.shift(1, fill_value=0) + + expected = pd.Series([pd.Timestamp(0), ser[0]]) + tm.assert_series_equal(result, expected) From 8bc5cb1c55fa9c9bb4e734df6794e0e5d08dc5c6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Mar 2020 14:03:07 -0700 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.0.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index bc234bbd31662..1f04cf69d5666 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -28,6 +28,7 @@ Fixed regressions - Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) - Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) - Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) +- Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`) .. --------------------------------------------------------------------------- From 56f0f4b7c6cfc2635db728b3042abc6b4336c513 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Mar 2020 14:06:32 -0700 Subject: [PATCH 3/3] docstring --- pandas/core/arrays/datetimelike.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1cdac30672ce4..105d9581b1a25 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -745,6 +745,7 @@ def _from_factorized(cls, values, original): def _values_for_argsort(self): return self._data + @Appender(ExtensionArray.shift.__doc__) def shift(self, periods=1, fill_value=None, axis=0): if not self.size or periods == 0: return self.copy() @@ -754,7 +755,7 @@ def shift(self, periods=1, fill_value=None, axis=0): elif not isinstance(fill_value, self._recognized_scalars): # only warn if we're not going to raise if self._scalar_type is Period and lib.is_integer(fill_value): - # kludge for #31971 + # kludge for #31971 since Period(integer) tries to cast to str new_fill = Period._from_ordinal(fill_value, freq=self.freq) else: new_fill = self._scalar_type(fill_value)