From e200f32a4f26914298efb4aba5573d34705171fd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 4 Jun 2019 16:04:44 -0500 Subject: [PATCH 1/8] BUG: avoid overflow in Bday generate_range, closes #24252 --- pandas/tests/indexes/datetimes/test_date_range.py | 7 +++++++ pandas/tseries/offsets.py | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 7f03793d880b0..93e43c26a76a1 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -740,6 +740,13 @@ def test_bdays_and_open_boundaries(self, closed): expected = pd.date_range(bday_start, bday_end, freq='D') tm.assert_index_equal(result, expected) + def test_bday_near_overflow(self): + # GH#24252 avoid doing unnecessary addition that _would_ overflow + start = pd.Timestamp.max.floor("D").to_pydatetime() + rng = pd.date_range(start, end=None, periods=1, freq='B') + expected = pd.DatetimeIndex([start], freq='B') + tm.assert_index_equal(rng, expected) + class TestCustomDateRange: diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index c1764b3845fce..b0d2a7dd86f33 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2467,6 +2467,11 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()): while cur <= end: yield cur + if cur == end: + # GH#24252 avoid overflows by not performing the addition + # in offset.apply unless we have to + break + # faster than cur + offset next_date = offset.apply(cur) if next_date <= cur: @@ -2477,6 +2482,11 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()): while cur >= end: yield cur + if cur == end: + # GH#24252 avoid overflows by not performing the addition + # in offset.apply unless we have to + break + # faster than cur + offset next_date = offset.apply(cur) if next_date >= cur: From d387afeb833ca1f6166ef282089634a35404cf99 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 4 Jun 2019 19:21:32 -0500 Subject: [PATCH 2/8] update tests to check for OutOfBoundsDatetime instead of OverflowError --- pandas/_libs/tslibs/timestamps.pyx | 8 +++++++- pandas/tests/arithmetic/test_timedelta64.py | 9 +++++---- pandas/tests/indexes/datetimes/test_date_range.py | 6 ++++++ pandas/tests/scalar/timestamp/test_timestamp.py | 7 +++++++ pandas/tests/tseries/offsets/test_offsets.py | 2 +- pandas/tseries/offsets.py | 4 +++- 6 files changed, 29 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c8c6efda30fae..3ce445c8d1dd1 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -24,6 +24,7 @@ from pandas._libs.tslibs.conversion cimport ( from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, npy_datetimestruct, dt64_to_dtstruct) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.offsets cimport to_offset from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timezones cimport ( @@ -409,7 +410,12 @@ class Timestamp(_Timestamp): " tz parameter will raise in the future. Use" " tz_convert instead.", FutureWarning) - ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) + try: + ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) + except OverflowError: + # GH#26651 re-raise as OutOfBoundsDatetime + raise OutOfBoundsDatetime("Cannot convert {ts_input} to Timestamp" + .format(ts_input=ts_input)) if ts.value == NPY_NAT: return NaT diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index c214a880345ae..37c9cbbf29d70 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -5,7 +5,8 @@ import numpy as np import pytest -from pandas.errors import NullFrequencyError, PerformanceWarning +from pandas.errors import ( + NullFrequencyError, OutOfBoundsDatetime, PerformanceWarning) import pandas as pd from pandas import ( @@ -466,10 +467,10 @@ def test_tdi_add_timestamp_nat_masking(self): def test_tdi_add_overflow(self): # See GH#14068 - msg = "too (big|large) to convert" - with pytest.raises(OverflowError, match=msg): + msg = "Cannot convert" + with pytest.raises(OutOfBoundsDatetime, match=msg): pd.to_timedelta(106580, 'D') + Timestamp('2000') - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsDatetime, match=msg): Timestamp('2000') + pd.to_timedelta(106580, 'D') _NaT = int(pd.NaT) + 1 diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 93e43c26a76a1..1545cc52eb1f4 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -747,6 +747,12 @@ def test_bday_near_overflow(self): expected = pd.DatetimeIndex([start], freq='B') tm.assert_index_equal(rng, expected) + def test_bday_overflow_error(self): + # GH#24252 check that we get OutOfBoundsDatetime and not OverflowError + start = pd.Timestamp.max.floor("D").to_pydatetime() + with pytest.raises(OutOfBoundsDatetime): + pd.date_range(start, periods=2, freq='B') + class TestCustomDateRange: diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 773b4e6f21a19..4b6b0dac916c6 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -463,6 +463,13 @@ def test_invalid_date_kwarg_with_string_input(self, arg): with pytest.raises(ValueError): Timestamp('2010-10-10 12:59:59.999999999', **kwarg) + def test_out_of_bounds_integer_value(self): + # GH#26651 check that we raise OutOfBoundsDatetime, not OverflowError + with pytest.raises(OutOfBoundsDatetime): + Timestamp(Timestamp.max.value * 2) + with pytest.raises(OutOfBoundsDatetime): + Timestamp(Timestamp.min.value * 2) + def test_out_of_bounds_value(self): one_us = np.timedelta64(1).astype('timedelta64[us]') diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 8c8a2f75c4a47..a1ad792e57bde 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -115,7 +115,7 @@ def test_apply_out_of_range(self, tz_naive_fixture): assert t.tzinfo == result.tzinfo except OutOfBoundsDatetime: - raise + pass except (ValueError, KeyError): # we are creating an invalid offset # so ignore diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index b0d2a7dd86f33..00837d36d9508 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -97,6 +97,8 @@ def wrapper(self, other): if tz is not None and result.tzinfo is None: result = conversion.localize_pydatetime(result, tz) + result = Timestamp(result) + return result return wrapper @@ -2330,7 +2332,7 @@ def apply(self, other): # an exception, when we call using the + operator, # we directly call the known method result = other.__add__(self) - if result == NotImplemented: + if result is NotImplemented: raise OverflowError return result elif isinstance(other, (datetime, np.datetime64, date)): From 68ba8a502e65e743e403d2cbf9db24870162cd11 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 7 Jun 2019 11:02:24 -0500 Subject: [PATCH 3/8] Move checking to convert_to_tsobject --- pandas/_libs/tslibs/conversion.pyx | 9 +++++++++ pandas/_libs/tslibs/timestamps.pyx | 7 +------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 04bb4454462a7..28aa0550ad3d2 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -275,6 +275,10 @@ cdef convert_to_tsobject(object ts, object tz, object unit, - iso8601 string object - python datetime object - another timestamp object + + Raises + ------ + OutOfBoundsDatetime : ts cannot be converted within implementation bounds """ cdef: _TSObject obj @@ -294,6 +298,11 @@ cdef convert_to_tsobject(object ts, object tz, object unit, if obj.value != NPY_NAT: dt64_to_dtstruct(obj.value, &obj.dts) elif is_integer_object(ts): + try: + ts = ts + except OverflowError: + # GH#26651 re-raise as OutOfBoundsDatetime + raise OutOfBoundsDatetime if ts == NPY_NAT: obj.value = NPY_NAT else: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3ce445c8d1dd1..520e98bb5bf76 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -410,12 +410,7 @@ class Timestamp(_Timestamp): " tz parameter will raise in the future. Use" " tz_convert instead.", FutureWarning) - try: - ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) - except OverflowError: - # GH#26651 re-raise as OutOfBoundsDatetime - raise OutOfBoundsDatetime("Cannot convert {ts_input} to Timestamp" - .format(ts_input=ts_input)) + ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) if ts.value == NPY_NAT: return NaT From 595f0ecb5dd5890c5082c35c8b225c6b0c3a689a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 10 Jun 2019 10:19:47 -0700 Subject: [PATCH 4/8] remove message from preliminary scalar analogue test --- pandas/tests/arithmetic/test_timedelta64.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index b6f358770b3a5..2dff9a6088de8 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -480,10 +480,10 @@ def test_tdi_add_timestamp_nat_masking(self): def test_tdi_add_overflow(self): # See GH#14068 - msg = "Cannot convert" - with pytest.raises(OutOfBoundsDatetime, match=msg): + # preliminary test scalar analogue of vectorized tests below + with pytest.raises(OutOfBoundsDatetime): pd.to_timedelta(106580, 'D') + Timestamp('2000') - with pytest.raises(OutOfBoundsDatetime, match=msg): + with pytest.raises(OutOfBoundsDatetime): Timestamp('2000') + pd.to_timedelta(106580, 'D') _NaT = int(pd.NaT) + 1 From bd792742fcbfb6adf0b028739eb40455f91413f3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 18 Jun 2019 13:34:52 -0700 Subject: [PATCH 5/8] whatsnew --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 833a9b1c342df..2453d4ab24726 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -567,6 +567,7 @@ Datetimelike - Bug in :meth:`isin` for datetimelike indexes; :class:`DatetimeIndex`, :class:`TimedeltaIndex` and :class:`PeriodIndex` where the ``levels`` parameter was ignored. (:issue:`26675`) - Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'`` - Bug when comparing a :class:`PeriodIndex` against a zero-dimensional numpy array (:issue:`26689`) +- Bug in :func:`date_range` with unnecessary ``OverflowError`` being raised for very large or very small dates (:issue:`26651`) Timedelta ^^^^^^^^^ From 7a3f83e707166e9a59a0cd51f2115ace7ccdf33d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 18 Jun 2019 18:17:28 -0700 Subject: [PATCH 6/8] remove unused import --- pandas/_libs/tslibs/timestamps.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 520e98bb5bf76..c8c6efda30fae 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -24,7 +24,6 @@ from pandas._libs.tslibs.conversion cimport ( from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, npy_datetimestruct, dt64_to_dtstruct) -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.offsets cimport to_offset from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timezones cimport ( From fe7c96134a04fd9eea1ff571f7be7d3859ae34fd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 19 Jun 2019 18:22:26 -0700 Subject: [PATCH 7/8] add missing import --- pandas/tests/frame/test_constructors.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 68017786eb6a6..7dc74961a2adc 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -5,6 +5,7 @@ import numpy as np import numpy.ma as ma +import numpy.ma.mrecords as mrecords import pytest from pandas.compat import PY36, is_platform_little_endian @@ -839,7 +840,7 @@ def test_constructor_maskedrecarray_dtype(self): data = np.ma.array( np.ma.zeros(5, dtype=[('date', ' Date: Thu, 20 Jun 2019 07:12:47 -0700 Subject: [PATCH 8/8] use ts in exception message --- pandas/_libs/tslibs/conversion.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 28aa0550ad3d2..0a3f4ed3cc91d 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -302,7 +302,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, ts = ts except OverflowError: # GH#26651 re-raise as OutOfBoundsDatetime - raise OutOfBoundsDatetime + raise OutOfBoundsDatetime(ts) if ts == NPY_NAT: obj.value = NPY_NAT else: