Skip to content

Commit f531dfb

Browse files
committed
Fix overflow bugs in date_Range
1 parent 1d3ed91 commit f531dfb

File tree

2 files changed

+121
-2
lines changed

2 files changed

+121
-2
lines changed

pandas/core/arrays/datetimes.py

Lines changed: 96 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1652,7 +1652,8 @@ def _generate_regular_range(cls, start, end, periods, freq):
16521652
return data
16531653

16541654

1655-
def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
1655+
def _generate_range_overflow_safe(endpoint, periods, stride,
1656+
side='start'):
16561657
"""
16571658
Calculate the second endpoint for passing to np.arange, checking
16581659
to avoid an integer overflow. Catch OverflowError and re-raise
@@ -1675,12 +1676,78 @@ def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
16751676
"""
16761677
# GH#14187 raise instead of incorrectly wrapping around
16771678
assert side in ['start', 'end']
1679+
1680+
i64max = np.iinfo(np.int64).max
1681+
msg = ('Cannot generate range with {side}={endpoint} and '
1682+
'periods={periods}'
1683+
.format(side=side, endpoint=endpoint, periods=periods))
1684+
1685+
with np.errstate(over="raise"):
1686+
# if periods * strides cannot be multiplied within the *uint64* bounds,
1687+
# we cannot salvage the operation by recursing, so raise
1688+
try:
1689+
addend = np.uint64(periods) * np.uint64(np.abs(stride))
1690+
except FloatingPointError:
1691+
raise tslib.OutOfBoundsDatetime(msg)
1692+
1693+
if np.abs(addend) <= i64max:
1694+
# relatively easy case without casting concerns
1695+
return _generate_range_overflow_safe_signed(
1696+
endpoint, periods, stride, side)
1697+
1698+
elif ((endpoint > 0 and side == 'start') or
1699+
(endpoint < 0 and side == 'end')):
1700+
# no chance of not-overflowing
1701+
raise tslib.OutOfBoundsDatetime(msg)
1702+
1703+
elif (side == 'end' and endpoint > i64max and endpoint - stride <= i64max):
1704+
# in _generate_regular_range we added `stride` thereby overflowing
1705+
# the bounds. Adjust to fix this.
1706+
return _generate_range_overflow_safe(endpoint - stride,
1707+
periods - 1, stride, side)
1708+
1709+
# split into smaller pieces
1710+
return _generate_range_recurse(endpoint, periods, stride, side)
1711+
1712+
1713+
def _generate_range_overflow_safe_signed(endpoint, periods, stride, side):
1714+
"""
1715+
A special case for _generate_range_overflow_safe where `periods * stride`
1716+
can be calculated without overflowing int64 bounds.
1717+
"""
1718+
assert side in ['start', 'end']
16781719
if side == 'end':
16791720
stride *= -1
16801721

1722+
with np.errstate(over="raise"):
1723+
addend = np.int64(periods) * np.int64(stride)
1724+
try:
1725+
# easy case with no overflows
1726+
return np.int64(endpoint) + addend
1727+
except (FloatingPointError, OverflowError):
1728+
# with endpoint negative and addend positive we risk
1729+
# FloatingPointError; with reversed signed we risk OverflowError
1730+
pass
1731+
1732+
if stride > 0:
1733+
# watch out for very special case in which we just slightly
1734+
# exceed implementation bounds, but when passing the result to
1735+
# np.arange will get a result slightly within the bounds
1736+
if endpoint >= 0:
1737+
result = np.uint64(endpoint) + np.uint64(addend)
1738+
i64max = np.uint64(np.iinfo(np.int64).max)
1739+
if result <= i64max + np.uint64(stride):
1740+
return result
1741+
else:
1742+
return _generate_range_recurse(endpoint, periods,
1743+
np.abs(stride), side)
1744+
elif stride < 0 and endpoint > 0:
1745+
return _generate_range_recurse(np.uint64(endpoint), periods,
1746+
np.abs(stride), side)
1747+
16811748
try:
16821749
other_end = checked_add_with_arr(np.int64(endpoint),
1683-
np.int64(periods) * stride)
1750+
addend)
16841751
except OverflowError:
16851752
raise tslib.OutOfBoundsDatetime('Cannot generate range with '
16861753
'{side}={endpoint} and '
@@ -1690,6 +1757,33 @@ def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
16901757
return other_end
16911758

16921759

1760+
def _generate_range_recurse(endpoint, periods, stride, side):
1761+
"""
1762+
Avoid problems in int64/uint64 mismatch by splitting range generation into
1763+
smaller pieces.
1764+
1765+
Parameters
1766+
----------
1767+
endpoint : int
1768+
periods : int
1769+
stride : int
1770+
side : {'start', 'end'}
1771+
1772+
Returns
1773+
-------
1774+
other_end : int
1775+
"""
1776+
# split into smaller pieces
1777+
mid_periods = periods // 2
1778+
remaining = periods - mid_periods
1779+
assert 0 < remaining < periods, (remaining, periods, endpoint, stride)
1780+
print(periods, mid_periods, endpoint, stride, side)
1781+
1782+
midpoint = _generate_range_overflow_safe(endpoint, mid_periods,
1783+
stride, side)
1784+
return _generate_range_overflow_safe(midpoint, remaining, stride, side)
1785+
1786+
16931787
def _infer_tz_from_endpoints(start, end, tz):
16941788
"""
16951789
If a timezone is not explicitly given via `tz`, see if one can

pandas/tests/indexes/datetimes/test_date_range.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,31 @@ def test_date_range_timestamp_equiv_preserve_frequency(self):
8080

8181

8282
class TestDateRanges(TestData):
83+
def test_date_range_multiplication_overflow(self):
84+
# check that overflows in calculating `addend = periods * stride`
85+
# are caught
86+
with tm.assert_produces_warning(None):
87+
# we should _not_ be seeing a overflow RuntimeWarning
88+
dti = date_range(start='1677-09-22', periods=213503, freq='D')
89+
90+
assert dti[0] == Timestamp('1677-09-22')
91+
assert len(dti) == 213503
92+
93+
msg = "Cannot generate range with"
94+
with pytest.raises(OutOfBoundsDatetime, match=msg):
95+
date_range('1969-05-04', periods=200000000, freq='30000D')
96+
97+
def test_date_range_unsigned_overflow_handling(self):
98+
# case where `addend = periods * stride` overflows int64 bounds
99+
# but not uint64 bounds
100+
dti = date_range(start='1677-09-22', end='2262-04-11', freq='D')
101+
102+
dti2 = date_range(start=dti[0], periods=len(dti), freq='D')
103+
assert dti2.equals(dti)
104+
105+
dti3 = date_range(end=dti[-1], periods=len(dti), freq='D')
106+
assert dti3.equals(dti)
107+
83108
def test_date_range_out_of_bounds(self):
84109
# GH#14187
85110
with pytest.raises(OutOfBoundsDatetime):

0 commit comments

Comments
 (0)