diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index aed54ab0f5040..0303b41e42e55 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -12,6 +12,8 @@ randint = np.random.randint np.set_printoptions(precision=4, suppress=True) options.display.max_rows=15 + import dateutil + import pytz from dateutil.relativedelta import relativedelta from pandas.tseries.api import * from pandas.tseries.offsets import * @@ -1266,32 +1268,37 @@ common zones, the names are the same as ``pytz``. .. ipython:: python # pytz - rng_utc = date_range('3/6/2012 00:00', periods=10, freq='D', tz='UTC') - rng_utc.tz + rng_pytz = date_range('3/6/2012 00:00', periods=10, freq='D', + tz='Europe/London') + rng_pytz.tz # dateutil - rng_utc_dateutil = date_range('3/6/2012 00:00', periods=10, freq='D', - tz='dateutil/UTC') - rng_utc_dateutil.tz + rng_dateutil = date_range('3/6/2012 00:00', periods=10, freq='D', + tz='dateutil/Europe/London') + rng_dateutil.tz -You can also construct the timezone explicitly first, which gives you more control over which -time zone is used: + # dateutil - utc special case + rng_utc = date_range('3/6/2012 00:00', periods=10, freq='D', + tz=dateutil.tz.tzutc()) + rng_utc.tz + +Note that the ``UTC`` timezone is a special case in ``dateutil`` and should be constructed explicitly +as an instance of ``dateutil.tz.tzutc``. You can also construct other timezones explicitly first, +which gives you more control over which time zone is used: .. ipython:: python # pytz - import pytz - tz_pytz = pytz.timezone('UTC') - rng_utc = date_range('3/6/2012 00:00', periods=10, freq='D', tz=tz_pytz) - rng_utc.tz + tz_pytz = pytz.timezone('Europe/London') + rng_pytz = date_range('3/6/2012 00:00', periods=10, freq='D', + tz=tz_pytz) + rng_pytz.tz == tz_pytz # dateutil - import dateutil - tz_dateutil = dateutil.tz.gettz('UTC') - rng_utc_dateutil = date_range('3/6/2012 00:00', periods=10, freq='D', - tz=tz_dateutil) - rng_utc_dateutil.tz - + tz_dateutil = dateutil.tz.gettz('Europe/London') + rng_dateutil = date_range('3/6/2012 00:00', periods=10, freq='D', + tz=tz_dateutil) + rng_dateutil.tz == tz_dateutil Timestamps, like Python's ``datetime.datetime`` object can be either time zone naive or time zone aware. Naive time series and DatetimeIndex objects can be @@ -1313,9 +1320,10 @@ tz-aware data to another time zone: ts_utc.tz_convert('US/Eastern') .. warning:: - Be very wary of conversions between libraries as ``pytz`` and ``dateutil`` - may have different definitions of the time zones. This is more of a problem for - unusual timezones than for 'standard' zones like ``US/Eastern``. + + Be wary of conversions between libraries. For some zones ``pytz`` and ``dateutil`` have different + definitions of the zone. This is more of a problem for unusual timezones than for + 'standard' zones like ``US/Eastern``. Under the hood, all timestamps are stored in UTC. Scalar values from a ``DatetimeIndex`` with a time zone will have their fields (day, hour, minute) @@ -1359,8 +1367,6 @@ TimeSeries, aligning the data on the UTC timestamps: result result.index -.. _timeseries.timedeltas: - In some cases, localize cannot determine the DST and non-DST hours when there are duplicates. This often happens when reading files that simply duplicate the hours. The infer_dst argument in tz_localize will attempt @@ -1376,6 +1382,8 @@ to determine the right offset. rng_hourly_eastern = rng_hourly.tz_localize('US/Eastern', infer_dst=True) rng_hourly_eastern.values +.. _timeseries.timedeltas: + Time Deltas ----------- diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 586e47ff4f303..d38565008640f 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -104,11 +104,9 @@ Enhancements .. ipython:: python - rng_utc_dateutil = date_range('3/6/2012 00:00', - periods=10, - freq='D', - tz='dateutil/UTC') - rng_utc_dateutil.tz + rng = date_range('3/6/2012 00:00', periods=10, freq='D', + tz='dateutil/Europe/London') + rng.tz See :ref:`the docs `. diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 302b8ca9983e0..456d331156011 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -2946,7 +2946,7 @@ def test_tz_pytz(self): def test_tz_dateutil(self): _skip_if_no_dateutil() import dateutil - utc = dateutil.tz.gettz('UTC') + utc = dateutil.tz.tzutc() dt_date = datetime(2013, 1, 2, tzinfo=utc) self.assertEqual(str(dt_date), str(Timestamp(dt_date))) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 31e5363dd5abe..3881ed5277b85 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -4630,7 +4630,8 @@ def test_getitem_setitem_datetime_tz_pytz(self): def test_getitem_setitem_datetime_tz_dateutil(self): _skip_if_no_dateutil(); - from dateutil.tz import gettz as tz + from dateutil.tz import gettz, tzutc + tz = lambda x: tzutc() if x == 'UTC' else gettz(x) # handle special case for utc in dateutil from pandas import date_range N = 50 diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index dd84ee27caf0e..0a732ac7bc7e8 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -2,7 +2,7 @@ from pandas.compat import range import pickle import nose - +import sys import numpy as np from pandas.core.index import Index @@ -36,6 +36,11 @@ def _skip_if_no_cday(): raise nose.SkipTest("CustomBusinessDay not available.") +def _skip_if_windows_python_3(): + if sys.version_info > (3,) and sys.platform == 'win32': + raise nose.SkipTest("not used on python 3/win32") + + def eq_gen_range(kwargs, expected): rng = generate_range(**kwargs) assert(np.array_equal(list(rng), expected)) @@ -300,7 +305,7 @@ def test_summary_pytz(self): def test_summary_dateutil(self): _skip_if_no_dateutil() import dateutil - bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.gettz('UTC')).summary() + bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() def test_misc(self): end = datetime(2009, 5, 13) @@ -391,8 +396,10 @@ def test_range_tz_pytz(self): def test_range_tz_dateutil(self): # GH 2906 _skip_if_no_dateutil() - from dateutil.tz import gettz as tz - + # Use maybe_get_tz to fix filename in tz under dateutil. + from pandas.tslib import maybe_get_tz + tz = lambda x: maybe_get_tz('dateutil/' + x) + start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern')) @@ -428,6 +435,7 @@ def test_month_range_union_tz_pytz(self): early_dr.union(late_dr) def test_month_range_union_tz_dateutil(self): + _skip_if_windows_python_3() _skip_if_no_dateutil() from dateutil.tz import gettz as timezone tz = timezone('US/Eastern') @@ -633,7 +641,7 @@ def test_summary_pytz(self): def test_summary_dateutil(self): _skip_if_no_dateutil() import dateutil - cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.gettz('UTC')).summary() + cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() def test_misc(self): end = datetime(2009, 5, 13) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 81387c3736481..38887ede2faca 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -85,7 +85,8 @@ def test_timestamp_tz_arg(self): def test_timestamp_tz_arg_dateutil(self): import dateutil - p = Period('1/1/2005', freq='M').to_timestamp(tz=dateutil.tz.gettz('Europe/Brussels')) + from pandas.tslib import maybe_get_tz + p = Period('1/1/2005', freq='M').to_timestamp(tz=maybe_get_tz('dateutil/Europe/Brussels')) self.assertEqual(p.tz, dateutil.tz.gettz('Europe/Brussels')) def test_timestamp_tz_arg_dateutil_from_string(self): diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 83cc5dcc7485f..04210b4f0c88f 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -58,6 +58,15 @@ def _skip_if_has_locale(): lang, _ = locale.getlocale() if lang is not None: raise nose.SkipTest("Specific locale is set {0}".format(lang)) + +def _skip_if_windows_python_3(): + if sys.version_info > (3,) and sys.platform == 'win32': + raise nose.SkipTest("not used on python 3/win32") + +def _skip_if_not_windows_python_3(): + if sys.version_info < (3,) or sys.platform != 'win32': + raise nose.SkipTest("only run on python 3/win32") + class TestTimeSeriesDuplicates(tm.TestCase): _multiprocess_can_split_ = True @@ -406,6 +415,16 @@ def test_timestamp_to_datetime(self): self.assertEqual(stamp, dtval) self.assertEqual(stamp.tzinfo, dtval.tzinfo) + def test_timestamp_to_datetime_dateutil(self): + _skip_if_no_pytz() + rng = date_range('20090415', '20090519', + tz='dateutil/US/Eastern') + + stamp = rng[0] + dtval = stamp.to_pydatetime() + self.assertEqual(stamp, dtval) + self.assertEqual(stamp.tzinfo, dtval.tzinfo) + def test_timestamp_to_datetime_explicit_pytz(self): _skip_if_no_pytz() import pytz @@ -418,6 +437,7 @@ def test_timestamp_to_datetime_explicit_pytz(self): self.assertEquals(stamp.tzinfo, dtval.tzinfo) def test_timestamp_to_datetime_explicit_dateutil(self): + _skip_if_windows_python_3() _skip_if_no_dateutil() import dateutil rng = date_range('20090415', '20090519', @@ -467,7 +487,7 @@ def _check_rng(rng): _check_rng(rng_eastern) _check_rng(rng_utc) - def test_index_convert_to_datetime_array_explicit_dateutil(self): + def test_index_convert_to_datetime_array_dateutil(self): _skip_if_no_dateutil() import dateutil @@ -480,8 +500,8 @@ def _check_rng(rng): self.assertEquals(x.tzinfo, stamp.tzinfo) rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', tz=dateutil.tz.gettz('US/Eastern')) - rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.gettz('UTC')) + rng_eastern = date_range('20090415', '20090519', tz='dateutil/US/Eastern') + rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc()) _check_rng(rng) _check_rng(rng_eastern) @@ -1560,14 +1580,14 @@ def test_to_period_tz_explicit_pytz(self): self.assert_(result == expected) self.assert_(ts.to_period().equals(xp)) - def test_to_period_tz_explicit_dateutil(self): + def test_to_period_tz_dateutil(self): _skip_if_no_dateutil() import dateutil from dateutil.tz import tzlocal xp = date_range('1/1/2000', '4/1/2000').to_period() - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.gettz('US/Eastern')) + ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') result = ts.to_period()[0] expected = ts[0].to_period() @@ -1575,7 +1595,7 @@ def test_to_period_tz_explicit_dateutil(self): self.assert_(result == expected) self.assert_(ts.to_period().equals(xp)) - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.gettz('UTC')) + ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) result = ts.to_period()[0] expected = ts[0].to_period() @@ -1793,17 +1813,17 @@ def test_append_concat_tz_explicit_pytz(self): appended = rng.append(rng2) self.assert_(appended.equals(rng3)) - def test_append_concat_tz_explicit_dateutil(self): + def test_append_concat_tz_dateutil(self): # GH 2938 _skip_if_no_dateutil() from dateutil.tz import gettz as timezone rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz=timezone('US/Eastern')) + tz='dateutil/US/Eastern') rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz=timezone('US/Eastern')) + tz='dateutil/US/Eastern') rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz=timezone('US/Eastern')) + tz='dateutil/US/Eastern') ts = Series(np.random.randn(len(rng)), rng) df = DataFrame(np.random.randn(len(rng), 4), index=rng) ts2 = Series(np.random.randn(len(rng2)), rng2) @@ -2021,11 +2041,11 @@ def test_period_resample_with_local_timezone_dateutil(self): _skip_if_no_dateutil() import dateutil - local_timezone = dateutil.tz.gettz('America/Los_Angeles') + local_timezone = 'dateutil/America/Los_Angeles' - start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.gettz('UTC')) + start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()) # 1 day later - end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.gettz('UTC')) + end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()) index = pd.date_range(start, end, freq='H') @@ -2990,13 +3010,13 @@ def compare(x, y): def test_class_ops_dateutil(self): _skip_if_no_dateutil() - from dateutil.tz import gettz as timezone + from dateutil.tz import tzutc def compare(x,y): self.assertEqual(int(np.round(Timestamp(x).value/1e9)), int(np.round(Timestamp(y).value/1e9))) compare(Timestamp.now(),datetime.now()) - compare(Timestamp.now('UTC'), datetime.now(timezone('UTC'))) + compare(Timestamp.now('UTC'), datetime.now(tzutc())) compare(Timestamp.utcnow(),datetime.utcnow()) compare(Timestamp.today(),datetime.today()) @@ -3149,8 +3169,8 @@ def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): def test_cant_compare_tz_naive_w_aware_dateutil(self): _skip_if_no_dateutil() - from dateutil.tz import gettz - utc = gettz('UTC') + from dateutil.tz import tzutc + utc = tzutc() # #1404 a = Timestamp('3/12/2012') b = Timestamp('3/12/2012', tz=utc) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 5fb1f9db620ae..51c533df863e6 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -770,8 +770,12 @@ def setUp(self): _skip_if_no_dateutil() def tz(self, tz): - ''' Construct a timezone object from a string. Overridden in subclass to parameterize tests. ''' - return dateutil.tz.gettz(tz) + ''' + Construct a dateutil timezone. + Use tslib.maybe_get_tz so that we get the filename on the tz right + on windows. See #7337. + ''' + return tslib.maybe_get_tz('dateutil/' + tz) def tzstr(self, tz): ''' Construct a timezone string from a string. Overridden in subclass to parameterize tests. ''' @@ -784,6 +788,19 @@ def cmptz(self, tz1, tz2): def localize(self, tz, x): return x.replace(tzinfo=tz) + def test_utc_with_system_utc(self): + from pandas.tslib import maybe_get_tz + + # from system utc to real utc + ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) + # check that the time hasn't changed. + self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc())) + + # from system utc to real utc + ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) + # check that the time hasn't changed. + self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc())) + class TestTimeZones(tm.TestCase): _multiprocess_can_split_ = True diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 9f1db62a54bf3..62e3b120c9d64 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1029,6 +1029,10 @@ cdef inline object _get_zone(object tz): return 'UTC' else: if _treat_tz_as_dateutil(tz): + if '.tar.gz' in tz._filename: + raise ValueError('Bad tz filename. Dateutil on python 3 on windows has a bug which causes tzfile._filename to be the same for all ' + 'timezone files. Please construct dateutil timezones implicitly by passing a string like "dateutil/Europe/London" ' + 'when you construct your pandas objects instead of passing a timezone object. See https://github.com/pydata/pandas/pull/7362') return 'dateutil/' + tz._filename else: # tz is a pytz timezone or unknown. @@ -1048,7 +1052,11 @@ cpdef inline object maybe_get_tz(object tz): ''' if isinstance(tz, string_types): if tz.startswith('dateutil/'): + zone = tz[9:] tz = _dateutil_gettz(tz[9:]) + # On Python 3 on Windows, the filename is not always set correctly. + if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename: + tz._filename = zone else: tz = pytz.timezone(tz) return tz @@ -1965,6 +1973,10 @@ cdef inline object _tz_cache_key(object tz): if isinstance(tz, _pytz_BaseTzInfo): return tz.zone elif isinstance(tz, _dateutil_tzfile): + if '.tar.gz' in tz._filename: + raise ValueError('Bad tz filename. Dateutil on python 3 on windows has a bug which causes tzfile._filename to be the same for all ' + 'timezone files. Please construct dateutil timezones implicitly by passing a string like "dateutil/Europe/London" ' + 'when you construct your pandas objects instead of passing a timezone object. See https://github.com/pydata/pandas/pull/7362') return tz._filename else: return None