Skip to content

Backport PR #32591 on branch 1.0.x (REG: dt64 shift with integer fill_value) #32647

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Fixed regressions
- Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`)
- Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`)
- Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`)
- Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`)


.. ---------------------------------------------------------------------------
Expand Down
51 changes: 51 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,57 @@ def _from_factorized(cls, values, original):
def _values_for_argsort(self):
return self._data

@Appender(ExtensionArray.shift.__doc__)
def shift(self, periods=1, fill_value=None, axis=0):
if not self.size or periods == 0:
return self.copy()

if is_valid_nat_for_dtype(fill_value, self.dtype):
fill_value = NaT
elif not isinstance(fill_value, self._recognized_scalars):
# only warn if we're not going to raise
if self._scalar_type is Period and lib.is_integer(fill_value):
# kludge for #31971 since Period(integer) tries to cast to str
new_fill = Period._from_ordinal(fill_value, freq=self.freq)
else:
new_fill = self._scalar_type(fill_value)

# stacklevel here is chosen to be correct when called from
# DataFrame.shift or Series.shift
warnings.warn(
f"Passing {type(fill_value)} to shift is deprecated and "
"will raise in a future version, pass "
f"{self._scalar_type.__name__} instead.",
FutureWarning,
stacklevel=7,
)
fill_value = new_fill

fill_value = self._unbox_scalar(fill_value)

new_values = self._data

# make sure array sent to np.roll is c_contiguous
f_ordered = new_values.flags.f_contiguous
if f_ordered:
new_values = new_values.T
axis = new_values.ndim - axis - 1

new_values = np.roll(new_values, periods, axis=axis)

axis_indexer = [slice(None)] * self.ndim
if periods > 0:
axis_indexer[axis] = slice(None, periods)
else:
axis_indexer[axis] = slice(periods, None)
new_values[tuple(axis_indexer)] = fill_value

# restore original order
if f_ordered:
new_values = new_values.T

return type(self)._simple_new(new_values, dtype=self.dtype)

# ------------------------------------------------------------------
# Additional array methods
# These are not part of the EA API, but we implement them because
Expand Down
13 changes: 8 additions & 5 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1897,10 +1897,7 @@ def diff(self, n: int, axis: int = 1) -> List["Block"]:
return super().diff(n, axis)

def shift(
self,
periods: int,
axis: libinternals.BlockPlacement = 0,
fill_value: Any = None,
self, periods: int, axis: int = 0, fill_value: Any = None,
) -> List["ExtensionBlock"]:
"""
Shift the block by `periods`.
Expand Down Expand Up @@ -2150,14 +2147,20 @@ def get_values(self, dtype=None):

def iget(self, key):
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
# TODO: this can be removed if we ever have 2D EA
# TODO(EA2D): this can be removed if we ever have 2D EA
result = super().iget(key)
if isinstance(result, np.datetime64):
result = Timestamp(result)
elif isinstance(result, np.timedelta64):
result = Timedelta(result)
return result

def shift(self, periods, axis=0, fill_value=None):
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
values = self.array_values()
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
return self.make_block_same_class(new_values)


class DatetimeBlock(DatetimeLikeBlockMixin, Block):
__slots__ = ()
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,23 @@ def test_inplace_arithmetic(self):
arr -= pd.Timedelta(days=1)
tm.assert_equal(arr, expected)

def test_shift_fill_int_deprecated(self):
# GH#31971
data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
arr = self.array_cls(data, freq="D")

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = arr.shift(1, fill_value=1)

expected = arr.copy()
if self.array_cls is PeriodArray:
fill_val = PeriodArray._scalar_type._from_ordinal(1, freq=arr.freq)
else:
fill_val = arr._scalar_type(1)
expected[0] = fill_val
expected[1:] = arr[:-1]
tm.assert_equal(result, expected)


class TestDatetimeArray(SharedTests):
index_cls = pd.DatetimeIndex
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/frame/methods/test_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,26 @@ def test_tshift(self, datetime_frame):
msg = "Freq was not given and was not set in the index"
with pytest.raises(ValueError, match=msg):
no_freq.tshift()

def test_shift_dt64values_int_fill_deprecated(self):
# GH#31971
ser = pd.Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")])
df = ser.to_frame()

with tm.assert_produces_warning(FutureWarning):
result = df.shift(1, fill_value=0)

expected = pd.Series([pd.Timestamp(0), ser[0]]).to_frame()
tm.assert_frame_equal(result, expected)

# axis = 1
df2 = pd.DataFrame({"A": ser, "B": ser})
df2._consolidate_inplace()

with tm.assert_produces_warning(FutureWarning):
result = df2.shift(1, axis=1, fill_value=0)

expected = pd.DataFrame(
{"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]}
)
tm.assert_frame_equal(result, expected)
10 changes: 10 additions & 0 deletions pandas/tests/series/methods/test_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,13 @@ def test_shift_categorical(self):

tm.assert_index_equal(s.values.categories, sp1.values.categories)
tm.assert_index_equal(s.values.categories, sn2.values.categories)

def test_shift_dt64values_int_fill_deprecated(self):
# GH#31971
ser = pd.Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")])

with tm.assert_produces_warning(FutureWarning):
result = ser.shift(1, fill_value=0)

expected = pd.Series([pd.Timestamp(0), ser[0]])
tm.assert_series_equal(result, expected)