@@ -1652,7 +1652,8 @@ def _generate_regular_range(cls, start, end, periods, freq):
1652
1652
return data
1653
1653
1654
1654
1655
- def _generate_range_overflow_safe (endpoint , periods , stride , side = 'start' ):
1655
+ def _generate_range_overflow_safe (endpoint , periods , stride ,
1656
+ side = 'start' ):
1656
1657
"""
1657
1658
Calculate the second endpoint for passing to np.arange, checking
1658
1659
to avoid an integer overflow. Catch OverflowError and re-raise
@@ -1675,12 +1676,78 @@ def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
1675
1676
"""
1676
1677
# GH#14187 raise instead of incorrectly wrapping around
1677
1678
assert side in ['start' , 'end' ]
1679
+
1680
+ i64max = np .iinfo (np .int64 ).max
1681
+ msg = ('Cannot generate range with {side}={endpoint} and '
1682
+ 'periods={periods}'
1683
+ .format (side = side , endpoint = endpoint , periods = periods ))
1684
+
1685
+ with np .errstate (over = "raise" ):
1686
+ # if periods * strides cannot be multiplied within the *uint64* bounds,
1687
+ # we cannot salvage the operation by recursing, so raise
1688
+ try :
1689
+ addend = np .uint64 (periods ) * np .uint64 (np .abs (stride ))
1690
+ except FloatingPointError :
1691
+ raise tslib .OutOfBoundsDatetime (msg )
1692
+
1693
+ if np .abs (addend ) <= i64max :
1694
+ # relatively easy case without casting concerns
1695
+ return _generate_range_overflow_safe_signed (
1696
+ endpoint , periods , stride , side )
1697
+
1698
+ elif ((endpoint > 0 and side == 'start' ) or
1699
+ (endpoint < 0 and side == 'end' )):
1700
+ # no chance of not-overflowing
1701
+ raise tslib .OutOfBoundsDatetime (msg )
1702
+
1703
+ elif (side == 'end' and endpoint > i64max and endpoint - stride <= i64max ):
1704
+ # in _generate_regular_range we added `stride` thereby overflowing
1705
+ # the bounds. Adjust to fix this.
1706
+ return _generate_range_overflow_safe (endpoint - stride ,
1707
+ periods - 1 , stride , side )
1708
+
1709
+ # split into smaller pieces
1710
+ return _generate_range_recurse (endpoint , periods , stride , side )
1711
+
1712
+
1713
+ def _generate_range_overflow_safe_signed (endpoint , periods , stride , side ):
1714
+ """
1715
+ A special case for _generate_range_overflow_safe where `periods * stride`
1716
+ can be calculated without overflowing int64 bounds.
1717
+ """
1718
+ assert side in ['start' , 'end' ]
1678
1719
if side == 'end' :
1679
1720
stride *= - 1
1680
1721
1722
+ with np .errstate (over = "raise" ):
1723
+ addend = np .int64 (periods ) * np .int64 (stride )
1724
+ try :
1725
+ # easy case with no overflows
1726
+ return np .int64 (endpoint ) + addend
1727
+ except (FloatingPointError , OverflowError ):
1728
+ # with endpoint negative and addend positive we risk
1729
+ # FloatingPointError; with reversed signed we risk OverflowError
1730
+ pass
1731
+
1732
+ if stride > 0 :
1733
+ # watch out for very special case in which we just slightly
1734
+ # exceed implementation bounds, but when passing the result to
1735
+ # np.arange will get a result slightly within the bounds
1736
+ if endpoint >= 0 :
1737
+ result = np .uint64 (endpoint ) + np .uint64 (addend )
1738
+ i64max = np .uint64 (np .iinfo (np .int64 ).max )
1739
+ if result <= i64max + np .uint64 (stride ):
1740
+ return result
1741
+ else :
1742
+ return _generate_range_recurse (endpoint , periods ,
1743
+ np .abs (stride ), side )
1744
+ elif stride < 0 and endpoint > 0 :
1745
+ return _generate_range_recurse (np .uint64 (endpoint ), periods ,
1746
+ np .abs (stride ), side )
1747
+
1681
1748
try :
1682
1749
other_end = checked_add_with_arr (np .int64 (endpoint ),
1683
- np . int64 ( periods ) * stride )
1750
+ addend )
1684
1751
except OverflowError :
1685
1752
raise tslib .OutOfBoundsDatetime ('Cannot generate range with '
1686
1753
'{side}={endpoint} and '
@@ -1690,6 +1757,33 @@ def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
1690
1757
return other_end
1691
1758
1692
1759
1760
+ def _generate_range_recurse (endpoint , periods , stride , side ):
1761
+ """
1762
+ Avoid problems in int64/uint64 mismatch by splitting range generation into
1763
+ smaller pieces.
1764
+
1765
+ Parameters
1766
+ ----------
1767
+ endpoint : int
1768
+ periods : int
1769
+ stride : int
1770
+ side : {'start', 'end'}
1771
+
1772
+ Returns
1773
+ -------
1774
+ other_end : int
1775
+ """
1776
+ # split into smaller pieces
1777
+ mid_periods = periods // 2
1778
+ remaining = periods - mid_periods
1779
+ assert 0 < remaining < periods , (remaining , periods , endpoint , stride )
1780
+ print (periods , mid_periods , endpoint , stride , side )
1781
+
1782
+ midpoint = _generate_range_overflow_safe (endpoint , mid_periods ,
1783
+ stride , side )
1784
+ return _generate_range_overflow_safe (midpoint , remaining , stride , side )
1785
+
1786
+
1693
1787
def _infer_tz_from_endpoints (start , end , tz ):
1694
1788
"""
1695
1789
If a timezone is not explicitly given via `tz`, see if one can
0 commit comments