Skip to content

BUG: Timedelta(td64_out_of_bounds) silently overflowing #38965

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -198,14 +198,14 @@ Datetimelike
- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`)
- Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`)
- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`, :issue:`38764`, :issue:`38792`)
- Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype (:issue:`38792`)
- Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype or a ``timedelta`` object ouf of bounds for ``timedelta64[ns]`` dtype (:issue:`38792`, :issue:`38965`)
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
-

Timedelta
^^^^^^^^^

- Bug in constructing :class:`Timedelta` from ``np.timedelta64`` objects with non-nanosecond units that are out of bounds for ``timedelta64[ns]`` (:issue:`38965`)
-
-

Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ cdef extern from "numpy/ndarraytypes.h":
NPY_FR_ps
NPY_FR_fs
NPY_FR_as
NPY_FR_GENERIC

cdef extern from "src/datetime/np_datetime.h":
ctypedef struct pandas_timedeltastruct:
Expand Down
89 changes: 79 additions & 10 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,18 @@ PyDateTime_IMPORT

cimport pandas._libs.tslibs.util as util
from pandas._libs.tslibs.base cimport ABCTimestamp
from pandas._libs.tslibs.conversion cimport cast_from_unit
from pandas._libs.tslibs.conversion cimport cast_from_unit, precision_from_unit
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
c_nat_strings as nat_strings,
checknull_with_nat,
)
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
cmp_scalar,
get_datetime64_unit,
get_timedelta64_value,
pandas_timedeltastruct,
td64_to_tdstruct,
)
Expand Down Expand Up @@ -156,7 +159,7 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
if isinstance(delta, _Timedelta):
delta = delta.value
if is_timedelta64_object(delta):
return delta.astype("timedelta64[ns]").item()
return get_timedelta64_value(ensure_td64ns(delta))
if is_integer_object(delta):
return delta
if PyDelta_Check(delta):
Expand All @@ -169,6 +172,72 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
raise TypeError(type(delta))


cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# generic -> default to nanoseconds
return "ns"
elif unit == NPY_DATETIMEUNIT.NPY_FR_us:
return "us"
elif unit == NPY_DATETIMEUNIT.NPY_FR_ms:
return "ms"
elif unit == NPY_DATETIMEUNIT.NPY_FR_s:
return "s"
elif unit == NPY_DATETIMEUNIT.NPY_FR_m:
return "m"
elif unit == NPY_DATETIMEUNIT.NPY_FR_h:
return "h"
elif unit == NPY_DATETIMEUNIT.NPY_FR_D:
return "D"
elif unit == NPY_DATETIMEUNIT.NPY_FR_W:
return "W"
elif unit == NPY_DATETIMEUNIT.NPY_FR_M:
return "M"
elif unit == NPY_DATETIMEUNIT.NPY_FR_Y:
return "Y"
else:
raise NotImplementedError(unit)


@cython.overflowcheck(True)
cdef object ensure_td64ns(object ts):
"""
Overflow-safe implementation of td64.astype("m8[ns]")

Parameters
----------
ts : np.timedelta64

Returns
-------
np.timedelta64[ns]
"""
cdef:
NPY_DATETIMEUNIT td64_unit
int64_t td64_value, mult
str unitstr

td64_unit = get_datetime64_unit(ts)
if (
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
unitstr = npy_unit_to_abbrev(td64_unit)

td64_value = get_timedelta64_value(ts)

mult = precision_from_unit(unitstr)[0]
try:
# NB: cython#1381 this cannot be *=
td64_value = td64_value * mult
except OverflowError as err:
from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta
raise OutOfBoundsTimedelta(ts)

return np.timedelta64(td64_value, "ns")

return ts


cdef convert_to_timedelta64(object ts, str unit):
"""
Convert an incoming object to a timedelta64 if possible.
Expand All @@ -184,37 +253,37 @@ cdef convert_to_timedelta64(object ts, str unit):
Return an ns based int64
"""
if checknull_with_nat(ts):
return np.timedelta64(NPY_NAT)
return np.timedelta64(NPY_NAT, "ns")
elif isinstance(ts, _Timedelta):
# already in the proper format
ts = np.timedelta64(ts.value)
ts = np.timedelta64(ts.value, "ns")
elif is_datetime64_object(ts):
# only accept a NaT here
if ts.astype('int64') == NPY_NAT:
return np.timedelta64(NPY_NAT)
elif is_timedelta64_object(ts):
ts = ts.astype(f"m8[{unit.lower()}]")
ts = ensure_td64ns(ts)
elif is_integer_object(ts):
if ts == NPY_NAT:
return np.timedelta64(NPY_NAT)
return np.timedelta64(NPY_NAT, "ns")
else:
if unit in ['Y', 'M', 'W']:
ts = np.timedelta64(ts, unit)
else:
ts = cast_from_unit(ts, unit)
ts = np.timedelta64(ts)
ts = np.timedelta64(ts, "ns")
elif is_float_object(ts):
if unit in ['Y', 'M', 'W']:
ts = np.timedelta64(int(ts), unit)
else:
ts = cast_from_unit(ts, unit)
ts = np.timedelta64(ts)
ts = np.timedelta64(ts, "ns")
elif isinstance(ts, str):
if len(ts) > 0 and ts[0] == 'P':
ts = parse_iso_format_string(ts)
else:
ts = parse_timedelta_string(ts)
ts = np.timedelta64(ts)
ts = np.timedelta64(ts, "ns")
elif is_tick_object(ts):
ts = np.timedelta64(ts.nanos, 'ns')

Expand Down Expand Up @@ -1196,7 +1265,7 @@ class Timedelta(_Timedelta):
elif is_timedelta64_object(value):
if unit is not None:
value = value.astype(f'timedelta64[{unit}]')
value = value.astype('timedelta64[ns]')
value = ensure_td64ns(value)
elif is_tick_object(value):
value = np.timedelta64(value.nanos, 'ns')
elif is_integer_object(value) or is_float_object(value):
Expand Down
20 changes: 16 additions & 4 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from pandas._libs.tslibs import (
NaT,
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
Period,
Timedelta,
Timestamp,
Expand Down Expand Up @@ -743,8 +744,12 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
val = val.value

elif isinstance(val, (np.timedelta64, timedelta)):
val = Timedelta(val).value
dtype = np.dtype("m8[ns]")
try:
val = Timedelta(val).value
except (OutOfBoundsTimedelta, OverflowError):
dtype = np.dtype(object)
else:
dtype = np.dtype("m8[ns]")

elif is_bool(val):
dtype = np.dtype(np.bool_)
Expand Down Expand Up @@ -1386,7 +1391,7 @@ def try_timedelta(v):

try:
td_values = to_timedelta(v)
except ValueError:
except (ValueError, OverflowError):
return v.reshape(shape)
else:
return np.asarray(td_values).reshape(shape)
Expand Down Expand Up @@ -1618,8 +1623,16 @@ def construct_2d_arraylike_from_scalar(
value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool
) -> np.ndarray:

shape = (length, width)

if dtype.kind in ["m", "M"]:
value = maybe_unbox_datetimelike(value, dtype)
elif dtype == object:
if isinstance(value, (np.timedelta64, np.datetime64)):
# calling np.array below would cast to pytimedelta/pydatetime
out = np.empty(shape, dtype=object)
out.fill(value)
return out

# Attempt to coerce to a numpy array
try:
Expand All @@ -1632,7 +1645,6 @@ def construct_2d_arraylike_from_scalar(
if arr.ndim != 0:
raise ValueError("DataFrame constructor not properly called!")

shape = (length, width)
return np.full(shape, arr)


Expand Down
28 changes: 17 additions & 11 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import pytest
import pytz

from pandas.compat.numpy import _np_version_under1p19, _np_version_under1p20
from pandas.compat.numpy import _np_version_under1p19

from pandas.core.dtypes.common import is_integer_dtype
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype
Expand Down Expand Up @@ -2371,16 +2371,10 @@ def test_from_timedelta_scalar_preserves_nanos(self, constructor):
def test_from_timestamp_scalar_preserves_nanos(self, constructor):
ts = Timestamp.now() + Timedelta(1)

obj = Series(ts, index=range(1), dtype="M8[ns]")
obj = constructor(ts, dtype="M8[ns]")
assert get1(obj) == ts

def test_from_timedelta64_scalar_object(self, constructor, request):
if getattr(constructor, "func", None) is DataFrame and _np_version_under1p20:
# getattr check means we only xfail when box is None
mark = pytest.mark.xfail(
reason="np.array(td64, dtype=object) converts to int"
)
request.node.add_marker(mark)
def test_from_timedelta64_scalar_object(self, constructor):

td = Timedelta(1)
td64 = td.to_timedelta64()
Expand All @@ -2407,8 +2401,20 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request):
with pytest.raises(TypeError, match="Cannot cast"):
constructor(scalar, dtype=dtype)

def test_from_out_of_bounds_datetime(self, constructor):
@pytest.mark.parametrize("cls", [datetime, np.datetime64])
def test_from_out_of_bounds_datetime(self, constructor, cls):
scalar = datetime(9999, 1, 1)
if cls is np.datetime64:
scalar = np.datetime64(scalar, "D")
result = constructor(scalar)

assert type(get1(result)) is cls

@pytest.mark.parametrize("cls", [timedelta, np.timedelta64])
def test_from_out_of_bounds_timedelta(self, constructor, cls):
scalar = datetime(9999, 1, 1) - datetime(1970, 1, 1)
if cls is np.timedelta64:
scalar = np.timedelta64(scalar, "D")
result = constructor(scalar)

assert type(get1(result)) is datetime
assert type(get1(result)) is cls
27 changes: 27 additions & 0 deletions pandas/tests/scalar/timedelta/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas._libs.tslibs import OutOfBoundsTimedelta

from pandas import Timedelta, offsets, to_timedelta


Expand Down Expand Up @@ -197,6 +199,31 @@ def test_overflow_on_construction():
Timedelta(timedelta(days=13 * 19999))


def test_construction_out_of_bounds_td64():
# TODO: parametrize over units just above/below the implementation bounds
# once GH#38964 is resolved

# Timedelta.max is just under 106752 days
td64 = np.timedelta64(106752, "D")
assert td64.astype("m8[ns]").view("i8") < 0 # i.e. naive astype will be wrong

msg = "106752 days"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(td64)

# But just back in bounds and we are OK
assert Timedelta(td64 - 1) == td64 - 1

td64 *= -1
assert td64.astype("m8[ns]").view("i8") > 0 # i.e. naive astype will be wrong

with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(td64)

# But just back in bounds and we are OK
assert Timedelta(td64 + 1) == td64 + 1


@pytest.mark.parametrize(
"fmt,exp",
[
Expand Down
2 changes: 1 addition & 1 deletion pandas/util/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def rewrite_exception(old_name: str, new_name: str):
try:
yield
except Exception as err:
msg = err.args[0]
msg = str(err.args[0])
msg = msg.replace(old_name, new_name)
args: Tuple[str, ...] = (msg,)
if len(err.args) > 1:
Expand Down