diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 84fca37318091..a391d73b8922e 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -2351,9 +2351,11 @@ A DST transition may also shift the local time ahead by 1 hour creating nonexist local times. The behavior of localizing a timeseries with nonexistent times can be controlled by the ``nonexistent`` argument. The following options are available: -* ``raise``: Raises a ``pytz.NonExistentTimeError`` (the default behavior) -* ``NaT``: Replaces nonexistent times with ``NaT`` -* ``shift``: Shifts nonexistent times forward to the closest real time +* ``'raise'``: Raises a ``pytz.NonExistentTimeError`` (the default behavior) +* ``'NaT'``: Replaces nonexistent times with ``NaT`` +* ``'shift_forward'``: Shifts nonexistent times forward to the closest real time +* ``'shift_backward'``: Shifts nonexistent times backward to the closest real time +* timedelta object: Shifts nonexistent times by the timedelta duration .. ipython:: python @@ -2367,12 +2369,14 @@ Localization of nonexistent times will raise an error by default. In [2]: dti.tz_localize('Europe/Warsaw') NonExistentTimeError: 2015-03-29 02:30:00 -Transform nonexistent times to ``NaT`` or the closest real time forward in time. +Transform nonexistent times to ``NaT`` or shift the times. .. ipython:: python dti - dti.tz_localize('Europe/Warsaw', nonexistent='shift') + dti.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + dti.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + dti.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta(1, unit='H')) dti.tz_localize('Europe/Warsaw', nonexistent='NaT') diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 50ed85cb42c91..fbbbe51473e1c 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -407,7 +407,7 @@ Other Enhancements - Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) - :func:`read_fwf` now accepts keyword ``infer_nrows`` (:issue:`15138`). - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) -- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`) +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`, :issue:`24466`) - :meth:`Index.difference` now has an optional ``sort`` parameter to specify whether the results should be sorted if possible (:issue:`17839`) - :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) - :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object. diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 6e4d79d7b6f9e..960311ea0aaec 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -13,7 +13,8 @@ from dateutil.tz import tzutc from datetime import time as datetime_time from cpython.datetime cimport (datetime, tzinfo, PyDateTime_Check, PyDate_Check, - PyDateTime_CheckExact, PyDateTime_IMPORT) + PyDateTime_CheckExact, PyDateTime_IMPORT, + PyDelta_Check) PyDateTime_IMPORT from pandas._libs.tslibs.ccalendar import DAY_SECONDS, HOUR_SECONDS @@ -28,7 +29,8 @@ from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.util cimport ( is_string_object, is_datetime64_object, is_integer_object, is_float_object) -from pandas._libs.tslibs.timedeltas cimport cast_from_unit +from pandas._libs.tslibs.timedeltas cimport (cast_from_unit, + delta_to_nanoseconds) from pandas._libs.tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, get_utcoffset, get_dst_info, get_timezone, maybe_get_tz, tz_compare) @@ -868,7 +870,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, - bool if True, treat all vals as DST. If False, treat them as non-DST - 'NaT' will return NaT where there are ambiguous times - nonexistent : {None, "NaT", "shift", "raise"} + nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", + timedelta-like} How to handle non-existent times when converting wall times to UTC .. versionadded:: 0.24.0 @@ -884,12 +887,14 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right int64_t *tdata int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins - int64_t HOURS_NS = HOUR_SECONDS * 1000000000 + int64_t first_delta + int64_t HOURS_NS = HOUR_SECONDS * 1000000000, shift_delta = 0 ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta ndarray trans_idx, grp, a_idx, b_idx, one_diff npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False - bint shift = False, fill_nonexist = False + bint shift_forward = False, shift_backward = False + bint fill_nonexist = False list trans_grp str stamp @@ -928,11 +933,16 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if nonexistent == 'NaT': fill_nonexist = True - elif nonexistent == 'shift': - shift = True - else: - assert nonexistent in ('raise', None), ("nonexistent must be one of" - " {'NaT', 'raise', 'shift'}") + elif nonexistent == 'shift_forward': + shift_forward = True + elif nonexistent == 'shift_backward': + shift_backward = True + elif PyDelta_Check(nonexistent): + shift_delta = delta_to_nanoseconds(nonexistent) + elif nonexistent not in ('raise', None): + msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', " + "shift_backwards} or a timedelta object") + raise ValueError(msg) trans, deltas, _ = get_dst_info(tz) @@ -1041,15 +1051,35 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, result[i] = right else: # Handle nonexistent times - if shift: - # Shift the nonexistent time forward to the closest existing - # time + if shift_forward or shift_backward or shift_delta != 0: + # Shift the nonexistent time to the closest existing time remaining_mins = val % HOURS_NS - new_local = val + (HOURS_NS - remaining_mins) + if shift_delta != 0: + # Validate that we don't relocalize on another nonexistent + # time + if -1 < shift_delta + remaining_mins < HOURS_NS: + raise ValueError( + "The provided timedelta will relocalize on a " + "nonexistent time: {}".format(nonexistent) + ) + new_local = val + shift_delta + elif shift_forward: + new_local = val + (HOURS_NS - remaining_mins) + else: + # Subtract 1 since the beginning hour is _inclusive_ of + # nonexistent times + new_local = val - remaining_mins - 1 delta_idx = trans.searchsorted(new_local, side='right') - # Need to subtract 1 from the delta_idx if the UTC offset of - # the target tz is greater than 0 - delta_idx_offset = int(deltas[0] > 0) + # Shift the delta_idx by if the UTC offset of + # the target tz is greater than 0 and we're moving forward + # or vice versa + first_delta = deltas[0] + if (shift_forward or shift_delta > 0) and first_delta > 0: + delta_idx_offset = 1 + elif (shift_backward or shift_delta < 0) and first_delta < 0: + delta_idx_offset = 1 + else: + delta_idx_offset = 0 delta_idx = delta_idx - delta_idx_offset result[i] = new_local - deltas[delta_idx] elif fill_nonexist: diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index cc06e9857a44f..604599f895476 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -481,13 +481,17 @@ class NaTType(_NaT): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 - nonexistent : 'shift', 'NaT', default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent time forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times @@ -515,13 +519,17 @@ class NaTType(_NaT): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 - nonexistent : 'shift', 'NaT', default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent time forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times @@ -545,13 +553,17 @@ class NaTType(_NaT): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 - nonexistent : 'shift', 'NaT', default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent time forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times @@ -605,13 +617,17 @@ class NaTType(_NaT): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time - nonexistent : 'shift', 'NaT', default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent time forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4761c7ff1f4eb..fe0564cb62c30 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -9,7 +9,7 @@ cimport numpy as cnp from numpy cimport int64_t, int32_t, int8_t cnp.import_array() -from datetime import time as datetime_time +from datetime import time as datetime_time, timedelta from cpython.datetime cimport (datetime, PyDateTime_Check, PyDelta_Check, PyTZInfo_Check, PyDateTime_IMPORT) @@ -789,13 +789,17 @@ class Timestamp(_Timestamp): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 - nonexistent : 'shift', 'NaT', default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent time forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times @@ -827,13 +831,17 @@ class Timestamp(_Timestamp): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 - nonexistent : 'shift', 'NaT', default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent time forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times @@ -859,13 +867,17 @@ class Timestamp(_Timestamp): - 'raise' will raise an AmbiguousTimeError for an ambiguous time .. versionadded:: 0.24.0 - nonexistent : 'shift', 'NaT', default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent time forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times @@ -1060,13 +1072,17 @@ class Timestamp(_Timestamp): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time - nonexistent : 'shift', 'NaT', default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent time forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times @@ -1106,9 +1122,13 @@ class Timestamp(_Timestamp): raise ValueError("The errors argument must be either 'coerce' " "or 'raise'.") - if nonexistent not in ('raise', 'NaT', 'shift'): + nonexistent_options = ('raise', 'NaT', 'shift_forward', + 'shift_backward') + if nonexistent not in nonexistent_options and not isinstance( + nonexistent, timedelta): raise ValueError("The nonexistent argument must be one of 'raise'," - " 'NaT' or 'shift'") + " 'NaT', 'shift_forward', 'shift_backward' or" + " a timedelta object") if self.tzinfo is None: # tz naive, localize diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 517c80619baea..2146217ffdd76 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -233,13 +233,17 @@ class TimelikeOps(object): .. versionadded:: 0.24.0 - nonexistent : 'shift', 'NaT', default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent time forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ea2742c5808a3..fcf93a304b7a3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from datetime import datetime, time +from datetime import datetime, time, timedelta import warnings import numpy as np @@ -842,13 +842,17 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - nonexistent : 'shift', 'NaT' default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent times forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times @@ -936,6 +940,25 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', 1 2018-10-28 02:36:00+02:00 2 2018-10-28 03:46:00+01:00 dtype: datetime64[ns, CET] + + If the DST transition causes nonexistent times, you can shift these + dates forward or backwards with a timedelta object or `'shift_forward'` + or `'shift_backwards'`. + >>> s = pd.to_datetime(pd.Series([ + ... '2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, 'Europe/Warsaw'] + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 0 2015-03-29 01:59:59.999999999+01:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, 'Europe/Warsaw'] + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) + 0 2015-03-29 03:30:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, 'Europe/Warsaw'] """ if errors is not None: warnings.warn("The errors argument is deprecated and will be " @@ -950,9 +973,13 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', raise ValueError("The errors argument must be either 'coerce' " "or 'raise'.") - if nonexistent not in ('raise', 'NaT', 'shift'): + nonexistent_options = ('raise', 'NaT', 'shift_forward', + 'shift_backward') + if nonexistent not in nonexistent_options and not isinstance( + nonexistent, timedelta): raise ValueError("The nonexistent argument must be one of 'raise'," - " 'NaT' or 'shift'") + " 'NaT', 'shift_forward', 'shift_backward' or" + " a timedelta object") if self.tz is not None: if tz is None: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3e782c6ef89e0..d0555bd2e44b1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,5 +1,6 @@ # pylint: disable=W0231,E1101 import collections +from datetime import timedelta import functools import gc import json @@ -9249,13 +9250,17 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - nonexistent : 'shift', 'NaT', default 'raise' + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, + default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift' will shift the nonexistent times forward to the closest - existing time + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta - 'raise' will raise an NonExistentTimeError if there are nonexistent times @@ -9313,10 +9318,33 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, 2018-10-28 02:36:00+02:00 1 2018-10-28 03:46:00+01:00 2 dtype: int64 + + If the DST transition causes nonexistent times, you can shift these + dates forward or backwards with a timedelta object or `'shift_forward'` + or `'shift_backwards'`. + >>> s = pd.Series(range(2), index=pd.DatetimeIndex([ + ... '2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) + >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 2015-03-29 03:00:00+02:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 2015-03-29 01:59:59.999999999+01:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) + 2015-03-29 03:30:00+02:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 """ - if nonexistent not in ('raise', 'NaT', 'shift'): + nonexistent_options = ('raise', 'NaT', 'shift_forward', + 'shift_backward') + if nonexistent not in nonexistent_options and not isinstance( + nonexistent, timedelta): raise ValueError("The nonexistent argument must be one of 'raise'," - " 'NaT' or 'shift'") + " 'NaT', 'shift_forward', 'shift_backward' or" + " a timedelta object") axis = self._get_axis_number(axis) ax = self._get_axis(axis) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index d36a3fd6f61bb..ee9137c264edc 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1407,7 +1407,7 @@ def _get_time_bins(self, ax): tz=ax.tz, name=ax.name, ambiguous='infer', - nonexistent='shift') + nonexistent='shift_forward') ax_values = ax.asi8 binner, bin_edges = self._adjust_bin_edges(binner, ax_values) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 42385127f0dad..8bcc9296cb010 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -581,7 +581,6 @@ def test_dti_tz_localize_bdate_range(self): @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) @pytest.mark.parametrize('method, exp', [ - ['shift', '2015-03-29 03:00:00'], ['NaT', pd.NaT], ['raise', None], ['foo', 'invalid'] @@ -601,6 +600,47 @@ def test_dti_tz_localize_nonexistent(self, tz, method, exp): expected = DatetimeIndex([exp] * n, tz=tz) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('start_ts, tz, end_ts, shift', [ + ['2015-03-29 02:20:00', 'Europe/Warsaw', '2015-03-29 03:00:00', + 'forward'], + ['2015-03-29 02:20:00', 'Europe/Warsaw', + '2015-03-29 01:59:59.999999999', 'backward'], + ['2015-03-29 02:20:00', 'Europe/Warsaw', + '2015-03-29 03:20:00', timedelta(hours=1)], + ['2015-03-29 02:20:00', 'Europe/Warsaw', + '2015-03-29 01:20:00', timedelta(hours=-1)], + ['2018-03-11 02:33:00', 'US/Pacific', '2018-03-11 03:00:00', + 'forward'], + ['2018-03-11 02:33:00', 'US/Pacific', '2018-03-11 01:59:59.999999999', + 'backward'], + ['2018-03-11 02:33:00', 'US/Pacific', '2018-03-11 03:33:00', + timedelta(hours=1)], + ['2018-03-11 02:33:00', 'US/Pacific', '2018-03-11 01:33:00', + timedelta(hours=-1)] + ]) + @pytest.mark.parametrize('tz_type', ['', 'dateutil/']) + def test_dti_tz_localize_nonexistent_shift(self, start_ts, tz, + end_ts, shift, + tz_type): + # GH 8917 + tz = tz_type + tz + if isinstance(shift, str): + shift = 'shift_' + shift + dti = DatetimeIndex([Timestamp(start_ts)]) + result = dti.tz_localize(tz, nonexistent=shift) + expected = DatetimeIndex([Timestamp(end_ts)]).tz_localize(tz) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('offset', [-1, 1]) + @pytest.mark.parametrize('tz_type', ['', 'dateutil/']) + def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): + # GH 8917 + tz = tz_type + 'Europe/Warsaw' + dti = DatetimeIndex([Timestamp('2015-03-29 02:20:00')]) + msg = "The provided timedelta will relocalize on a nonexistent time" + with pytest.raises(ValueError, match=msg): + dti.tz_localize(tz, nonexistent=timedelta(seconds=offset)) + @pytest.mark.filterwarnings('ignore::FutureWarning') def test_dti_tz_localize_errors_deprecation(self): # GH 22644 diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 60cf613a5f2c6..7b57a280c56fc 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -291,8 +291,8 @@ def test_resample_nonexistent_time_bin_edge(self): df = DataFrame(data=list(range(len(index))), index=index) result = df.groupby(pd.Grouper(freq='1D')).count() expected = date_range(start='2017-10-09', end='2017-10-20', freq='D', - tz="America/Sao_Paulo", nonexistent='shift', - closed='left') + tz="America/Sao_Paulo", + nonexistent='shift_forward', closed='left') tm.assert_index_equal(result.index, expected) def test_resample_ambiguous_time_bin_edge(self): diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index c02dc1083c366..bc67a3e72f8d0 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -183,14 +183,48 @@ def test_timestamp_tz_localize(self, tz): assert result.hour == expected.hour assert result == expected - @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) - def test_timestamp_tz_localize_nonexistent_shift(self, tz): - # GH 8917 - ts = Timestamp('2015-03-29 02:20:00') - result = ts.tz_localize(tz, nonexistent='shift') - expected = Timestamp('2015-03-29 03:00:00').tz_localize(tz) + @pytest.mark.parametrize('start_ts, tz, end_ts, shift', [ + ['2015-03-29 02:20:00', 'Europe/Warsaw', '2015-03-29 03:00:00', + 'forward'], + ['2015-03-29 02:20:00', 'Europe/Warsaw', + '2015-03-29 01:59:59.999999999', 'backward'], + ['2015-03-29 02:20:00', 'Europe/Warsaw', + '2015-03-29 03:20:00', timedelta(hours=1)], + ['2015-03-29 02:20:00', 'Europe/Warsaw', + '2015-03-29 01:20:00', timedelta(hours=-1)], + ['2018-03-11 02:33:00', 'US/Pacific', '2018-03-11 03:00:00', + 'forward'], + ['2018-03-11 02:33:00', 'US/Pacific', '2018-03-11 01:59:59.999999999', + 'backward'], + ['2018-03-11 02:33:00', 'US/Pacific', '2018-03-11 03:33:00', + timedelta(hours=1)], + ['2018-03-11 02:33:00', 'US/Pacific', '2018-03-11 01:33:00', + timedelta(hours=-1)] + ]) + @pytest.mark.parametrize('tz_type', ['', 'dateutil/']) + def test_timestamp_tz_localize_nonexistent_shift(self, start_ts, tz, + end_ts, shift, + tz_type): + # GH 8917, 24466 + tz = tz_type + tz + if isinstance(shift, str): + shift = 'shift_' + shift + ts = Timestamp(start_ts) + result = ts.tz_localize(tz, nonexistent=shift) + expected = Timestamp(end_ts).tz_localize(tz) assert result == expected + @pytest.mark.parametrize('offset', [-1, 1]) + @pytest.mark.parametrize('tz_type', ['', 'dateutil/']) + def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset, + tz_type): + # GH 8917, 24466 + tz = tz_type + 'Europe/Warsaw' + ts = Timestamp('2015-03-29 02:20:00') + msg = "The provided timedelta will relocalize on a nonexistent time" + with pytest.raises(ValueError, match=msg): + ts.tz_localize(tz, nonexistent=timedelta(seconds=offset)) + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) def test_timestamp_tz_localize_nonexistent_NaT(self, tz): # GH 8917 diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index d3ca85df3fd4f..6fc6aa98fe950 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -163,7 +163,7 @@ def test_round_dst_border_ambiguous(self, method): def test_round_dst_border_nonexistent(self, method, ts_str, freq): # GH 23324 round near "spring forward" DST ts = Timestamp(ts_str, tz='America/Chicago') - result = getattr(ts, method)(freq, nonexistent='shift') + result = getattr(ts, method)(freq, nonexistent='shift_forward') expected = Timestamp('2018-03-11 03:00:00', tz='America/Chicago') assert result == expected diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 895717835c630..52b72bcafe555 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -289,7 +289,7 @@ def test_dt_round_tz_ambiguous(self, method): def test_dt_round_tz_nonexistent(self, method, ts_str, freq): # GH 23324 round near "spring forward" DST s = Series([pd.Timestamp(ts_str, tz='America/Chicago')]) - result = getattr(s.dt, method)(freq, nonexistent='shift') + result = getattr(s.dt, method)(freq, nonexistent='shift_forward') expected = Series( [pd.Timestamp('2018-03-11 03:00:00', tz='America/Chicago')] ) diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 2e52f7ddbac9c..7f49f94ef57ce 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -79,7 +79,7 @@ def test_series_tz_localize_ambiguous_bool(self): @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) @pytest.mark.parametrize('method, exp', [ - ['shift', '2015-03-29 03:00:00'], + ['shift_forward', '2015-03-29 03:00:00'], ['NaT', NaT], ['raise', None], ['foo', 'invalid']