Skip to content

BUG/WIP: segfault manifesting with dateutil=2.6 w.r.t. replace when timezones are present #14631

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/requirements-3.5_OSX.pip
Original file line number Diff line number Diff line change
@@ -1 +1 @@
python-dateutil>=2.5.0
python-dateutil==2.5.3
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.19.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,6 @@ Performance Improvements

Bug Fixes
~~~~~~~~~

- compat with ``dateutil==2.6.0`` for testing (:issue:`14621`)
- allow ``nanoseconds`` in ``Timestamp.replace`` kwargs (:issue:`14621`)
1 change: 1 addition & 0 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def wrapper(self, other):
other = other.tz_localize(None)

result = func(self, other)

if self._adjust_dst:
result = tslib._localize_pydatetime(result, tz)

Expand Down
20 changes: 14 additions & 6 deletions pandas/tseries/tests/test_offsets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from distutils.version import LooseVersion
from datetime import date, datetime, timedelta
from dateutil.relativedelta import relativedelta
from pandas.compat import range, iteritems
Expand Down Expand Up @@ -4851,6 +4852,7 @@ def _test_all_offsets(self, n, **kwds):

def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset):
offset = DateOffset(**{offset_name: offset_n})

t = tstart + offset
if expected_utc_offset is not None:
self.assertTrue(get_utc_offset_hours(t) == expected_utc_offset)
Expand Down Expand Up @@ -4890,17 +4892,23 @@ def _make_timestamp(self, string, hrs_offset, tz):
return Timestamp(string + offset_string).tz_convert(tz)

def test_fallback_plural(self):
"""test moving from daylight savings to standard time"""
# test moving from daylight savings to standard time
import dateutil
for tz, utc_offsets in self.timezone_utc_offsets.items():
hrs_pre = utc_offsets['utc_offset_daylight']
hrs_post = utc_offsets['utc_offset_standard']
self._test_all_offsets(
n=3, tstart=self._make_timestamp(self.ts_pre_fallback,
hrs_pre, tz),
expected_utc_offset=hrs_post)

if dateutil.__version__ != LooseVersion('2.6.0'):
# buggy ambiguous behavior in 2.6.0
# GH 14621
# https://github.com/dateutil/dateutil/issues/321
self._test_all_offsets(
n=3, tstart=self._make_timestamp(self.ts_pre_fallback,
hrs_pre, tz),
expected_utc_offset=hrs_post)

def test_springforward_plural(self):
"""test moving from standard to daylight savings"""
# test moving from standard to daylight savings
for tz, utc_offsets in self.timezone_utc_offsets.items():
hrs_pre = utc_offsets['utc_offset_standard']
hrs_post = utc_offsets['utc_offset_daylight']
Expand Down
89 changes: 86 additions & 3 deletions pandas/tseries/tests/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np
import pytz

from distutils.version import LooseVersion
from pandas.types.dtypes import DatetimeTZDtype
from pandas import (Index, Series, DataFrame, isnull, Timestamp)

Expand Down Expand Up @@ -518,8 +518,12 @@ def f():

times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H",
tz=tz, ambiguous='infer')
self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz))
self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz))
self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz,
freq="H"))
if dateutil.__version__ != LooseVersion('2.6.0'):
# GH 14621
self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz,
freq="H"))

def test_ambiguous_nat(self):
tz = self.tz('US/Eastern')
Expand Down Expand Up @@ -1163,6 +1167,85 @@ class TestTimeZones(tm.TestCase):
def setUp(self):
tm._skip_if_no_pytz()

def test_replace(self):
# GH 14621
# GH 7825
# replacing datetime components with and w/o presence of a timezone
dt = Timestamp('2016-01-01 09:00:00')
result = dt.replace(hour=0)
expected = Timestamp('2016-01-01 00:00:00')
self.assertEqual(result, expected)

for tz in self.timezones:
dt = Timestamp('2016-01-01 09:00:00', tz=tz)
result = dt.replace(hour=0)
expected = Timestamp('2016-01-01 00:00:00', tz=tz)
self.assertEqual(result, expected)

# we preserve nanoseconds
dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
result = dt.replace(hour=0)
expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz)
self.assertEqual(result, expected)

# test all
dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
result = dt.replace(year=2015, month=2, day=2, hour=0, minute=5,
second=5, microsecond=5, nanosecond=5)
expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz)
self.assertEqual(result, expected)

# error
def f():
dt.replace(foo=5)
self.assertRaises(ValueError, f)

def f():
dt.replace(hour=0.1)
self.assertRaises(ValueError, f)

# assert conversion to naive is the same as replacing tzinfo with None
dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern')
self.assertEqual(dt.tz_localize(None), dt.replace(tzinfo=None))

def test_ambiguous_compat(self):
# validate that pytz and dateutil are compat for dst
# when the transition happens
tm._skip_if_no_dateutil()
tm._skip_if_no_pytz()

pytz_zone = 'Europe/London'
dateutil_zone = 'dateutil/Europe/London'
result_pytz = (Timestamp('2013-10-27 01:00:00')
.tz_localize(pytz_zone, ambiguous=0))
result_dateutil = (Timestamp('2013-10-27 01:00:00')
.tz_localize(dateutil_zone, ambiguous=0))
self.assertEqual(result_pytz.value, result_dateutil.value)
self.assertEqual(result_pytz.value, 1382835600000000000)

# dateutil 2.6 buggy w.r.t. ambiguous=0
if dateutil.__version__ != LooseVersion('2.6.0'):
# GH 14621
# https://github.com/dateutil/dateutil/issues/321
self.assertEqual(result_pytz.to_pydatetime().tzname(),
result_dateutil.to_pydatetime().tzname())
self.assertEqual(str(result_pytz), str(result_dateutil))

# 1 hour difference
result_pytz = (Timestamp('2013-10-27 01:00:00')
.tz_localize(pytz_zone, ambiguous=1))
result_dateutil = (Timestamp('2013-10-27 01:00:00')
.tz_localize(dateutil_zone, ambiguous=1))
self.assertEqual(result_pytz.value, result_dateutil.value)
self.assertEqual(result_pytz.value, 1382832000000000000)

# dateutil < 2.6 is buggy w.r.t. ambiguous timezones
if dateutil.__version__ > LooseVersion('2.5.3'):
# GH 14621
self.assertEqual(str(result_pytz), str(result_dateutil))
self.assertEqual(result_pytz.to_pydatetime().tzname(),
result_dateutil.to_pydatetime().tzname())

def test_index_equals_with_tz(self):
left = date_range('1/1/2011', periods=100, freq='H', tz='utc')
right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern')
Expand Down
5 changes: 3 additions & 2 deletions pandas/tseries/tests/test_tslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,9 @@ def test_repr(self):

# dateutil zone change (only matters for repr)
import dateutil
if dateutil.__version__ >= LooseVersion(
'2.3') and dateutil.__version__ <= LooseVersion('2.4.0'):
if (dateutil.__version__ >= LooseVersion('2.3') and
(dateutil.__version__ <= LooseVersion('2.4.0') or
dateutil.__version__ >= LooseVersion('2.6.0'))):
timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern',
'dateutil/US/Pacific']
else:
Expand Down
95 changes: 80 additions & 15 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ except NameError: # py3
cdef inline object create_timestamp_from_ts(
int64_t value, pandas_datetimestruct dts,
object tz, object freq):
""" convenience routine to construct a Timestamp from its parts """
cdef _Timestamp ts_base
ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month,
dts.day, dts.hour, dts.min,
Expand All @@ -112,6 +113,7 @@ cdef inline object create_timestamp_from_ts(
cdef inline object create_datetime_from_ts(
int64_t value, pandas_datetimestruct dts,
object tz, object freq):
""" convenience routine to construct a datetime.datetime from its parts """
return datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us, tz)

Expand Down Expand Up @@ -378,7 +380,6 @@ class Timestamp(_Timestamp):
# Mixing pydatetime positional and keyword arguments is forbidden!

cdef _TSObject ts
cdef _Timestamp ts_base

if offset is not None:
# deprecate offset kwd in 0.19.0, GH13593
Expand Down Expand Up @@ -412,17 +413,7 @@ class Timestamp(_Timestamp):
from pandas.tseries.frequencies import to_offset
freq = to_offset(freq)

# make datetime happy
ts_base = _Timestamp.__new__(cls, ts.dts.year, ts.dts.month,
ts.dts.day, ts.dts.hour, ts.dts.min,
ts.dts.sec, ts.dts.us, ts.tzinfo)

# fill out rest of data
ts_base.value = ts.value
ts_base.freq = freq
ts_base.nanosecond = ts.dts.ps / 1000

return ts_base
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)

def _round(self, freq, rounder):

Expand Down Expand Up @@ -660,8 +651,80 @@ class Timestamp(_Timestamp):
astimezone = tz_convert

def replace(self, **kwds):
return Timestamp(datetime.replace(self, **kwds),
freq=self.freq)
"""
implements datetime.replace, handles nanoseconds

Parameters
----------
kwargs: key-value dict

accepted keywords are:
year, month, day, hour, minute, second, microsecond, nanosecond, tzinfo

values must be integer, or for tzinfo, a tz-convertible

Returns
-------
Timestamp with fields replaced
"""

cdef:
pandas_datetimestruct dts
int64_t value
object tzinfo, result, k, v
_TSObject ts

# set to naive if needed
tzinfo = self.tzinfo
value = self.value
if tzinfo is not None:
value = tz_convert_single(value, 'UTC', tzinfo)

# setup components
pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts)
dts.ps = self.nanosecond * 1000

# replace
def validate(k, v):
""" validate integers """
if not isinstance(v, int):
raise ValueError("value must be an integer, received {v} for {k}".format(v=type(v), k=k))
return v

for k, v in kwds.items():
if k == 'year':
dts.year = validate(k, v)
elif k == 'month':
dts.month = validate(k, v)
elif k == 'day':
dts.day = validate(k, v)
elif k == 'hour':
dts.hour = validate(k, v)
elif k == 'minute':
dts.min = validate(k, v)
elif k == 'second':
dts.sec = validate(k, v)
elif k == 'microsecond':
dts.us = validate(k, v)
elif k == 'nanosecond':
dts.ps = validate(k, v) * 1000
elif k == 'tzinfo':
tzinfo = v
else:
raise ValueError("invalid name {} passed".format(k))

# reconstruct & check bounds
value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
if value != NPY_NAT:
_check_dts_bounds(&dts)

# set tz if needed
if tzinfo is not None:
value = tz_convert_single(value, tzinfo, 'UTC')

result = create_timestamp_from_ts(value, dts, tzinfo, self.freq)

return result

def isoformat(self, sep='T'):
base = super(_Timestamp, self).isoformat(sep=sep)
Expand Down Expand Up @@ -5041,7 +5104,9 @@ cpdef normalize_date(object dt):
-------
normalized : datetime.datetime or Timestamp
"""
if PyDateTime_Check(dt):
if is_timestamp(dt):
return dt.replace(hour=0, minute=0, second=0, microsecond=0, nanosecond=0)
elif PyDateTime_Check(dt):
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
elif PyDate_Check(dt):
return datetime(dt.year, dt.month, dt.day)
Expand Down