Skip to content

BUG: avoid overflow in Bday generate_range, closes #24252 #26651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ Datetimelike
- Bug in :meth:`isin` for datetimelike indexes; :class:`DatetimeIndex`, :class:`TimedeltaIndex` and :class:`PeriodIndex` where the ``levels`` parameter was ignored. (:issue:`26675`)
- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'``
- Bug when comparing a :class:`PeriodIndex` against a zero-dimensional numpy array (:issue:`26689`)
- Bug in :func:`date_range` with unnecessary ``OverflowError`` being raised for very large or very small dates (:issue:`26651`)

Timedelta
^^^^^^^^^
Expand Down
9 changes: 9 additions & 0 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,10 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
- iso8601 string object
- python datetime object
- another timestamp object

Raises
------
OutOfBoundsDatetime : ts cannot be converted within implementation bounds
"""
cdef:
_TSObject obj
Expand All @@ -294,6 +298,11 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
if obj.value != NPY_NAT:
dt64_to_dtstruct(obj.value, &obj.dts)
elif is_integer_object(ts):
try:
ts = <int64_t>ts
except OverflowError:
# GH#26651 re-raise as OutOfBoundsDatetime
raise OutOfBoundsDatetime(ts)
if ts == NPY_NAT:
obj.value = NPY_NAT
else:
Expand Down
9 changes: 5 additions & 4 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import numpy as np
import pytest

from pandas.errors import NullFrequencyError, PerformanceWarning
from pandas.errors import (
NullFrequencyError, OutOfBoundsDatetime, PerformanceWarning)

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -479,10 +480,10 @@ def test_tdi_add_timestamp_nat_masking(self):

def test_tdi_add_overflow(self):
# See GH#14068
msg = "too (big|large) to convert"
with pytest.raises(OverflowError, match=msg):
# preliminary test scalar analogue of vectorized tests below
with pytest.raises(OutOfBoundsDatetime):
pd.to_timedelta(106580, 'D') + Timestamp('2000')
with pytest.raises(OverflowError, match=msg):
with pytest.raises(OutOfBoundsDatetime):
Timestamp('2000') + pd.to_timedelta(106580, 'D')

_NaT = int(pd.NaT) + 1
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import numpy as np
import numpy.ma as ma
import numpy.ma.mrecords as mrecords
import pytest

from pandas.compat import PY36, is_platform_little_endian
Expand Down Expand Up @@ -839,7 +840,7 @@ def test_constructor_maskedrecarray_dtype(self):
data = np.ma.array(
np.ma.zeros(5, dtype=[('date', '<f8'), ('price', '<f8')]),
mask=[False] * 5)
data = data.view(ma.mrecords.mrecarray)
data = data.view(mrecords.mrecarray)
result = pd.DataFrame(data, dtype=int)
expected = pd.DataFrame(np.zeros((5, 2), dtype=int),
columns=['date', 'price'])
Expand Down Expand Up @@ -868,7 +869,7 @@ def test_constructor_mrecarray(self):
# call assert_frame_equal for all selections of 3 arrays
for comb in itertools.combinations(arrays, 3):
names, data = zip(*comb)
mrecs = ma.mrecords.fromarrays(data, names=names)
mrecs = mrecords.fromarrays(data, names=names)

# fill the comb
comb = {k: (v.filled() if hasattr(v, 'filled') else v)
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,19 @@ def test_bdays_and_open_boundaries(self, closed):
expected = pd.date_range(bday_start, bday_end, freq='D')
tm.assert_index_equal(result, expected)

def test_bday_near_overflow(self):
# GH#24252 avoid doing unnecessary addition that _would_ overflow
start = pd.Timestamp.max.floor("D").to_pydatetime()
rng = pd.date_range(start, end=None, periods=1, freq='B')
expected = pd.DatetimeIndex([start], freq='B')
tm.assert_index_equal(rng, expected)

def test_bday_overflow_error(self):
# GH#24252 check that we get OutOfBoundsDatetime and not OverflowError
start = pd.Timestamp.max.floor("D").to_pydatetime()
with pytest.raises(OutOfBoundsDatetime):
pd.date_range(start, periods=2, freq='B')


class TestCustomDateRange:

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,13 @@ def test_invalid_date_kwarg_with_string_input(self, arg):
with pytest.raises(ValueError):
Timestamp('2010-10-10 12:59:59.999999999', **kwarg)

def test_out_of_bounds_integer_value(self):
# GH#26651 check that we raise OutOfBoundsDatetime, not OverflowError
with pytest.raises(OutOfBoundsDatetime):
Timestamp(Timestamp.max.value * 2)
with pytest.raises(OutOfBoundsDatetime):
Timestamp(Timestamp.min.value * 2)

def test_out_of_bounds_value(self):
one_us = np.timedelta64(1).astype('timedelta64[us]')

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/tseries/offsets/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_apply_out_of_range(self, tz_naive_fixture):
assert t.tzinfo == result.tzinfo

except OutOfBoundsDatetime:
raise
pass
except (ValueError, KeyError):
# we are creating an invalid offset
# so ignore
Expand Down
14 changes: 13 additions & 1 deletion pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ def wrapper(self, other):
if tz is not None and result.tzinfo is None:
result = conversion.localize_pydatetime(result, tz)

result = Timestamp(result)

return result
return wrapper

Expand Down Expand Up @@ -2330,7 +2332,7 @@ def apply(self, other):
# an exception, when we call using the + operator,
# we directly call the known method
result = other.__add__(self)
if result == NotImplemented:
if result is NotImplemented:
raise OverflowError
return result
elif isinstance(other, (datetime, np.datetime64, date)):
Expand Down Expand Up @@ -2467,6 +2469,11 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()):
while cur <= end:
yield cur

if cur == end:
# GH#24252 avoid overflows by not performing the addition
# in offset.apply unless we have to
break

# faster than cur + offset
next_date = offset.apply(cur)
if next_date <= cur:
Expand All @@ -2477,6 +2484,11 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()):
while cur >= end:
yield cur

if cur == end:
# GH#24252 avoid overflows by not performing the addition
# in offset.apply unless we have to
break

# faster than cur + offset
next_date = offset.apply(cur)
if next_date >= cur:
Expand Down