From ce349d1e887213c18eb469ab4ce86597b5f72bd2 Mon Sep 17 00:00:00 2001 From: Ka Wo Chen Date: Sat, 22 Aug 2015 10:46:00 -0400 Subject: [PATCH] BUG: GH10885 where an edge case in date_range produces an extra timestamp --- doc/source/whatsnew/v0.17.0.txt | 1 + pandas/tseries/index.py | 6 ++++-- pandas/tseries/tests/test_daterange.py | 12 ++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index d30b7875e44b7..cc8f135eb62b0 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -781,6 +781,7 @@ Bug Fixes - Bug in ``DatetimeIndex.take`` and ``TimedeltaIndex.take`` may not raise ``IndexError`` against invalid index (:issue:`10295`) - Bug in ``Series([np.nan]).astype('M8[ms]')``, which now returns ``Series([pd.NaT])`` (:issue:`10747`) - Bug in ``PeriodIndex.order`` reset freq (:issue:`10295`) +- Bug in ``date_range`` when ``freq`` divides ``end`` as nanos (:issue:`10885`) - Bug in ``iloc`` allowing memory outside bounds of a Series to be accessed with negative integers (:issue:`10779`) - Bug in ``read_msgpack`` where encoding is not respected (:issue:`10580`) - Bug preventing access to the first index when using ``iloc`` with a list containing the appropriate negative integer (:issue:`10547`, :issue:`10779`) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 0525a29ef3fd0..c6c66a62b86b5 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1,4 +1,5 @@ # pylint: disable=E1101 +from __future__ import division import operator import warnings from datetime import time, datetime @@ -1793,8 +1794,9 @@ def _generate_regular_range(start, end, periods, offset): stride = offset.nanos if periods is None: b = Timestamp(start).value - e = Timestamp(end).value - e += stride - e % stride + # cannot just use e = Timestamp(end) + 1 because arange breaks when + # stride is too large, see GH10887 + e = b + (Timestamp(end).value - b)//stride * stride + stride//2 # end.tz == start.tz by this point due to _generate implementation tz = start.tz elif start is not None: diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 86e0f7162c545..42136c3433977 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -490,6 +490,18 @@ def test_years_only(self): self.assertEqual(dr[0], datetime(2014, 1, 31)) self.assertEqual(dr[-1], datetime(2014, 12, 31)) + def test_freq_divides_end_in_nanos(self): + # GH 10885 + result_1 = date_range('2005-01-12 10:00', '2005-01-12 16:00', + freq='345min') + result_2 = date_range('2005-01-13 10:00', '2005-01-13 16:00', + freq='345min') + expected_1 = DatetimeIndex(['2005-01-12 10:00:00', '2005-01-12 15:45:00'], + dtype='datetime64[ns]', freq='345T', tz=None) + expected_2 = DatetimeIndex(['2005-01-13 10:00:00', '2005-01-13 15:45:00'], + dtype='datetime64[ns]', freq='345T', tz=None) + self.assertTrue(result_1.equals(expected_1)) + self.assertTrue(result_2.equals(expected_2)) class TestCustomDateRange(tm.TestCase):