diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 85b0abe421eb2..a52c80106f100 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -2357,6 +2357,38 @@ constructor as well as ``tz_localize``. # tz_convert(None) is identical with tz_convert('UTC').tz_localize(None) didx.tz_convert('UCT').tz_localize(None) +.. _timeseries.timezone_nonexistent: + +Nonexistent Times when Localizing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A DST transition may also shift the local time ahead by 1 hour creating nonexistent +local times. The behavior of localizing a timeseries with nonexistent times +can be controlled by the ``nonexistent`` argument. The following options are available: + +* ``raise``: Raises a ``pytz.NonExistentTimeError`` (the default behavior) +* ``NaT``: Replaces nonexistent times with ``NaT`` +* ``shift``: Shifts nonexistent times forward to the closest real time + +.. ipython:: python + dti = date_range(start='2015-03-29 01:30:00', periods=3, freq='H') + # 2:30 is a nonexistent time + +Localization of nonexistent times will raise an error by default. + +.. code-block:: ipython + + In [2]: dti.tz_localize('Europe/Warsaw') + NonExistentTimeError: 2015-03-29 02:30:00 + +Transform nonexistent times to ``NaT`` or the closest real time forward in time. + +.. ipython:: python + dti + dti.tz_localize('Europe/Warsaw', nonexistent='shift') + dti.tz_localize('Europe/Warsaw', nonexistent='NaT') + + .. _timeseries.timezone_series: TZ Aware Dtypes diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 4f17133ef4a8c..ef576223db4d1 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -205,6 +205,7 @@ Other Enhancements - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). - Compatibility with Matplotlib 3.0 (:issue:`22790`). - Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`) .. _whatsnew_0240.api_breaking: @@ -912,6 +913,7 @@ Deprecations - :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`) - :func:`DatetimeIndex.shift` and :func:`PeriodIndex.shift` now accept ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`, :issue:`22912`) - The ``fastpath`` keyword of the different Index constructors is deprecated (:issue:`23110`). +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors`` argument in favor of the ``nonexistent`` argument (:issue:`8917`) .. _whatsnew_0240.prior_deprecations: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index d7eef546befbd..f9c604cd76472 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - import cython from cython import Py_ssize_t @@ -44,6 +43,7 @@ from nattype cimport NPY_NAT, checknull_with_nat # Constants cdef int64_t DAY_NS = 86400000000000LL +cdef int64_t HOURS_NS = 3600000000000 NS_DTYPE = np.dtype('M8[ns]') TD_DTYPE = np.dtype('m8[ns]') @@ -458,8 +458,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, if tz is not None: # shift for localize_tso ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, - ambiguous='raise', - errors='raise')[0] + ambiguous='raise')[0] except OutOfBoundsDatetime: # GH#19382 for just-barely-OutOfBounds falling back to dateutil @@ -826,7 +825,7 @@ def tz_convert(int64_t[:] vals, object tz1, object tz2): @cython.boundscheck(False) @cython.wraparound(False) def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, - object errors='raise'): + object nonexistent=None): """ Localize tzinfo-naive i8 to given time zone (using pytz). If there are ambiguities in the values, raise AmbiguousTimeError. @@ -837,7 +836,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, tz : tzinfo or None ambiguous : str, bool, or arraylike If arraylike, must have the same length as vals - errors : {"raise", "coerce"}, default "raise" + nonexistent : str + If arraylike, must have the same length as vals + + .. versionadded:: 0.24.0 Returns ------- @@ -849,16 +851,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, ndarray ambiguous_array Py_ssize_t i, idx, pos, ntrans, n = len(vals) int64_t *tdata - int64_t v, left, right + int64_t v, left, right, val, v_left, v_right ndarray[int64_t] result, result_a, result_b, dst_hours npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False - bint is_coerce = errors == 'coerce', is_raise = errors == 'raise' + bint shift = False, fill_nonexist = False # Vectorized version of DstTzInfo.localize - - assert is_coerce or is_raise - if tz == UTC or tz is None: return vals @@ -888,39 +887,45 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, "the same size as vals") ambiguous_array = np.asarray(ambiguous) + if nonexistent == 'NaT': + fill_nonexist = True + elif nonexistent == 'shift': + shift = True + else: + assert nonexistent in ('raise', None), ("nonexistent must be one of" + " {'NaT', 'raise', 'shift'}") + trans, deltas, typ = get_dst_info(tz) tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) + # Determine whether each date lies left of the DST transition (store in + # result_a) or right of the DST transition (store in result_b) result_a = np.empty(n, dtype=np.int64) result_b = np.empty(n, dtype=np.int64) result_a.fill(NPY_NAT) result_b.fill(NPY_NAT) - # left side - idx_shifted = (np.maximum(0, trans.searchsorted( + idx_shifted_left = (np.maximum(0, trans.searchsorted( vals - DAY_NS, side='right') - 1)).astype(np.int64) - for i in range(n): - v = vals[i] - deltas[idx_shifted[i]] - pos = bisect_right_i8(tdata, v, ntrans) - 1 - - # timestamp falls to the left side of the DST transition - if v + deltas[pos] == vals[i]: - result_a[i] = v - - # right side - idx_shifted = (np.maximum(0, trans.searchsorted( + idx_shifted_right = (np.maximum(0, trans.searchsorted( vals + DAY_NS, side='right') - 1)).astype(np.int64) for i in range(n): - v = vals[i] - deltas[idx_shifted[i]] - pos = bisect_right_i8(tdata, v, ntrans) - 1 + val = vals[i] + v_left = val - deltas[idx_shifted_left[i]] + pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 + # timestamp falls to the left side of the DST transition + if v_left + deltas[pos_left] == val: + result_a[i] = v_left + v_right = val - deltas[idx_shifted_right[i]] + pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 # timestamp falls to the right side of the DST transition - if v + deltas[pos] == vals[i]: - result_b[i] = v + if v_right + deltas[pos_right] == val: + result_b[i] = v_right if infer_dst: dst_hours = np.empty(n, dtype=np.int64) @@ -935,7 +940,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, stamp = _render_tstamp(vals[trans_idx]) raise pytz.AmbiguousTimeError( "Cannot infer dst time from %s as there " - "are no repeated times" % stamp) + "are no repeated times".format(stamp)) # Split the array into contiguous chunks (where the difference between # indices is 1). These are effectively dst transitions in different # years which is useful for checking that there is not an ambiguous @@ -960,7 +965,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if switch_idx.size > 1: raise pytz.AmbiguousTimeError( "There are %i dst switches when " - "there should only be 1." % switch_idx.size) + "there should only be 1.".format(switch_idx.size)) switch_idx = switch_idx[0] + 1 # Pull the only index and adjust a_idx = grp[:switch_idx] @@ -968,10 +973,11 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) for i in range(n): + val = vals[i] left = result_a[i] right = result_b[i] - if vals[i] == NPY_NAT: - result[i] = vals[i] + if val == NPY_NAT: + result[i] = val elif left != NPY_NAT and right != NPY_NAT: if left == right: result[i] = left @@ -986,19 +992,27 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, elif fill: result[i] = NPY_NAT else: - stamp = _render_tstamp(vals[i]) + stamp = _render_tstamp(val) raise pytz.AmbiguousTimeError( "Cannot infer dst time from %r, try using the " - "'ambiguous' argument" % stamp) + "'ambiguous' argument".format(stamp)) elif left != NPY_NAT: result[i] = left elif right != NPY_NAT: result[i] = right else: - if is_coerce: + # Handle nonexistent times + if shift: + # Shift the nonexistent time forward to the closest existing + # time + remaining_minutes = val % HOURS_NS + new_local = val + (HOURS_NS - remaining_minutes) + delta_idx = trans.searchsorted(new_local, side='right') - 1 + result[i] = new_local - deltas[delta_idx] + elif fill_nonexist: result[i] = NPY_NAT else: - stamp = _render_tstamp(vals[i]) + stamp = _render_tstamp(val) raise pytz.NonExistentTimeError(stamp) return result diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index ae4f9c821b5d1..0eec84ecf8285 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -564,14 +564,26 @@ class NaTType(_NaT): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time - errors : 'raise', 'coerce', default 'raise' + nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent time forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 + + errors : 'raise', 'coerce', default None - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from - or to DST time) + or to DST time). Use ``nonexistent='raise'`` instead. - 'coerce' will return NaT if the timestamp can not be converted - into the specified timezone + into the specified timezone. Use ``nonexistent='NaT'`` instead. - .. versionadded:: 0.19.0 + .. deprecated:: 0.24.0 Returns ------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 0c2753dbc6f28..08b0c5472549e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -961,7 +961,8 @@ class Timestamp(_Timestamp): def is_leap_year(self): return bool(ccalendar.is_leapyear(self.year)) - def tz_localize(self, tz, ambiguous='raise', errors='raise'): + def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', + errors=None): """ Convert naive Timestamp to local time zone, or remove timezone from tz-aware Timestamp. @@ -978,14 +979,26 @@ class Timestamp(_Timestamp): - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time - errors : 'raise', 'coerce', default 'raise' + nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent time forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 + + errors : 'raise', 'coerce', default None - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from - or to DST time) + or to DST time). Use ``nonexistent='raise'`` instead. - 'coerce' will return NaT if the timestamp can not be converted - into the specified timezone + into the specified timezone. Use ``nonexistent='NaT'`` instead. - .. versionadded:: 0.19.0 + .. deprecated:: 0.24.0 Returns ------- @@ -999,13 +1012,31 @@ class Timestamp(_Timestamp): if ambiguous == 'infer': raise ValueError('Cannot infer offset with only one time.') + if errors is not None: + warnings.warn("The errors argument is deprecated and will be " + "removed in a future release. Use " + "nonexistent='NaT' or nonexistent='raise' " + "instead.", FutureWarning) + if errors == 'coerce': + nonexistent = 'NaT' + elif errors == 'raise': + nonexistent = 'raise' + else: + raise ValueError("The errors argument must be either 'coerce' " + "or 'raise'.") + + if nonexistent not in ('raise', 'NaT', 'shift'): + raise ValueError("The nonexistent argument must be one of 'raise'," + " 'NaT' or 'shift'") + if self.tzinfo is None: # tz naive, localize tz = maybe_get_tz(tz) if not is_string_object(ambiguous): ambiguous = [ambiguous] value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, - ambiguous=ambiguous, errors=errors)[0] + ambiguous=ambiguous, + nonexistent=nonexistent)[0] return Timestamp(value, tz=tz) else: if tz is None: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ac90483513af5..b6574c121c087 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -611,7 +611,8 @@ def tz_convert(self, tz): # No conversion since timestamps are all UTC to begin with return self._shallow_copy(tz=tz) - def tz_localize(self, tz, ambiguous='raise', errors='raise'): + def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', + errors=None): """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. @@ -627,8 +628,7 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): tz : string, pytz.timezone, dateutil.tz.tzfile or None Time zone to convert timestamps to. Passing ``None`` will remove the time zone information preserving local time. - ambiguous : str {'infer', 'NaT', 'raise'} or bool array, - default 'raise' + ambiguous : 'infer', 'NaT', bool array, default 'raise' - 'infer' will attempt to infer fall dst-transition hours based on order @@ -639,15 +639,27 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - errors : {'raise', 'coerce'}, default 'raise' + nonexistent : 'shift', 'NaT' default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent times forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 + + errors : {'raise', 'coerce'}, default None - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified time zone (e.g. due to a transition from - or to DST time) + or to DST time). Use ``nonexistent='raise'`` instead. - 'coerce' will return NaT if the timestamp can not be converted - to the specified time zone + to the specified time zone. Use ``nonexistent='NaT'`` instead. - .. versionadded:: 0.19.0 + .. deprecated:: 0.24.0 Returns ------- @@ -689,6 +701,23 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): '2018-03-03 09:00:00'], dtype='datetime64[ns]', freq='D') """ + if errors is not None: + warnings.warn("The errors argument is deprecated and will be " + "removed in a future release. Use " + "nonexistent='NaT' or nonexistent='raise' " + "instead.", FutureWarning) + if errors == 'coerce': + nonexistent = 'NaT' + elif errors == 'raise': + nonexistent = 'raise' + else: + raise ValueError("The errors argument must be either 'coerce' " + "or 'raise'.") + + if nonexistent not in ('raise', 'NaT', 'shift'): + raise ValueError("The nonexistent argument must be one of 'raise'," + " 'NaT' or 'shift'") + if self.tz is not None: if tz is None: new_dates = conversion.tz_convert(self.asi8, 'UTC', self.tz) @@ -698,9 +727,9 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): tz = timezones.maybe_get_tz(tz) # Convert to UTC - new_dates = conversion.tz_localize_to_utc(self.asi8, tz, - ambiguous=ambiguous, - errors=errors) + new_dates = conversion.tz_localize_to_utc( + self.asi8, tz, ambiguous=ambiguous, nonexistent=nonexistent, + ) new_dates = new_dates.view(_NS_DTYPE) return self._shallow_copy(new_dates, tz=tz) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 31b700abcfdb3..c24872d7c89e9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8627,7 +8627,7 @@ def _tz_convert(ax, tz): return result.__finalize__(self) def tz_localize(self, tz, axis=0, level=None, copy=True, - ambiguous='raise'): + ambiguous='raise', nonexistent='raise'): """ Localize tz-naive TimeSeries to target time zone. @@ -8649,6 +8649,17 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times + nonexistent : 'shift', 'NaT', default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift' will shift the nonexistent times forward to the closest + existing time + - 'NaT' will return NaT where there are nonexistent times + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times + + .. versionadded:: 0.24.0 Returns ------- @@ -8658,10 +8669,14 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, TypeError If the TimeSeries is tz-aware and tz is not None. """ + if nonexistent not in ('raise', 'NaT', 'shift'): + raise ValueError("The nonexistent argument must be one of 'raise'," + " 'NaT' or 'shift'") + axis = self._get_axis_number(axis) ax = self._get_axis(axis) - def _tz_localize(ax, tz, ambiguous): + def _tz_localize(ax, tz, ambiguous, nonexistent): if not hasattr(ax, 'tz_localize'): if len(ax) > 0: ax_name = self._get_axis_name(axis) @@ -8670,19 +8685,23 @@ def _tz_localize(ax, tz, ambiguous): else: ax = DatetimeIndex([], tz=tz) else: - ax = ax.tz_localize(tz, ambiguous=ambiguous) + ax = ax.tz_localize( + tz, ambiguous=ambiguous, nonexistent=nonexistent + ) return ax # if a level is given it must be a MultiIndex level or # equivalent to the axis name if isinstance(ax, MultiIndex): level = ax._get_level_number(level) - new_level = _tz_localize(ax.levels[level], tz, ambiguous) + new_level = _tz_localize( + ax.levels[level], tz, ambiguous, nonexistent + ) ax = ax.set_levels(new_level, level=level) else: if level not in (None, 0, ax.name): raise ValueError("The level {0} is not valid".format(level)) - ax = _tz_localize(ax, tz, ambiguous) + ax = _tz_localize(ax, tz, ambiguous, nonexistent) result = self._constructor(self._data, copy=copy) result.set_axis(ax, axis=axis, inplace=True) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index dc01f7ccbd496..1369783657f92 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -312,9 +312,13 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self): index.tz_localize(tz=tz) with pytest.raises(pytz.NonExistentTimeError): - index.tz_localize(tz=tz, errors='raise') + with tm.assert_produces_warning(FutureWarning): + index.tz_localize(tz=tz, errors='raise') - result = index.tz_localize(tz=tz, errors='coerce') + with tm.assert_produces_warning(FutureWarning, + clear=FutureWarning, + check_stacklevel=False): + result = index.tz_localize(tz=tz, errors='coerce') test_times = ['2015-03-08 01:00-05:00', 'NaT', '2015-03-08 03:00-04:00'] dti = to_datetime(test_times, utc=True) @@ -574,6 +578,42 @@ def test_dti_tz_localize_bdate_range(self): localized = dr.tz_localize(pytz.utc) tm.assert_index_equal(dr_utc, localized) + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) + @pytest.mark.parametrize('method, exp', [ + ['shift', '2015-03-29 03:00:00'], + ['NaT', pd.NaT], + ['raise', None], + ['foo', 'invalid'] + ]) + def test_dti_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 + n = 60 + dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') + if method == 'raise': + with pytest.raises(pytz.NonExistentTimeError): + dti.tz_localize(tz, nonexistent=method) + elif exp == 'invalid': + with pytest.raises(ValueError): + dti.tz_localize(tz, nonexistent=method) + else: + result = dti.tz_localize(tz, nonexistent=method) + expected = DatetimeIndex([exp] * n, tz=tz) + tm.assert_index_equal(result, expected) + + @pytest.mark.filterwarnings('ignore::FutureWarning') + def test_dti_tz_localize_errors_deprecation(self): + # GH 22644 + tz = 'Europe/Warsaw' + n = 60 + dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(ValueError): + dti.tz_localize(tz, errors='foo') + # make sure errors='coerce' gets mapped correctly to nonexistent + result = dti.tz_localize(tz, errors='coerce') + expected = dti.tz_localize(tz, nonexistent='NaT') + tm.assert_index_equal(result, expected) + # ------------------------------------------------------------- # DatetimeIndex.normalize diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 8cebfafeae82a..827ad3581cd49 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -79,20 +79,44 @@ def test_tz_localize_ambiguous(self): ('2015-03-08 02:30', 'US/Pacific'), ('2015-03-29 02:00', 'Europe/Paris'), ('2015-03-29 02:30', 'Europe/Belgrade')]) + @pytest.mark.filterwarnings('ignore::FutureWarning') def test_tz_localize_nonexistent(self, stamp, tz): # GH#13057 ts = Timestamp(stamp) with pytest.raises(NonExistentTimeError): ts.tz_localize(tz) + # GH 22644 with pytest.raises(NonExistentTimeError): - ts.tz_localize(tz, errors='raise') - assert ts.tz_localize(tz, errors='coerce') is NaT + with tm.assert_produces_warning(FutureWarning): + ts.tz_localize(tz, errors='raise') + with tm.assert_produces_warning(FutureWarning): + assert ts.tz_localize(tz, errors='coerce') is NaT def test_tz_localize_errors_ambiguous(self): # GH#13057 ts = Timestamp('2015-11-1 01:00') with pytest.raises(AmbiguousTimeError): - ts.tz_localize('US/Pacific', errors='coerce') + with tm.assert_produces_warning(FutureWarning): + ts.tz_localize('US/Pacific', errors='coerce') + + @pytest.mark.filterwarnings('ignore::FutureWarning') + def test_tz_localize_errors_invalid_arg(self): + # GH 22644 + tz = 'Europe/Warsaw' + ts = Timestamp('2015-03-29 02:00:00') + with pytest.raises(ValueError): + with tm.assert_produces_warning(FutureWarning): + ts.tz_localize(tz, errors='foo') + + def test_tz_localize_errors_coerce(self): + # GH 22644 + # make sure errors='coerce' gets mapped correctly to nonexistent + tz = 'Europe/Warsaw' + ts = Timestamp('2015-03-29 02:00:00') + with tm.assert_produces_warning(FutureWarning): + result = ts.tz_localize(tz, errors='coerce') + expected = ts.tz_localize(tz, nonexistent='NaT') + assert result is expected @pytest.mark.parametrize('stamp', ['2014-02-01 09:00', '2014-07-08 09:00', '2014-11-01 17:00', '2014-11-05 00:00']) @@ -158,6 +182,30 @@ def test_timestamp_tz_localize(self, tz): assert result.hour == expected.hour assert result == expected + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) + def test_timestamp_tz_localize_nonexistent_shift(self, tz): + # GH 8917 + ts = Timestamp('2015-03-29 02:20:00') + result = ts.tz_localize(tz, nonexistent='shift') + expected = Timestamp('2015-03-29 03:00:00').tz_localize(tz) + assert result == expected + + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) + def test_timestamp_tz_localize_nonexistent_NaT(self, tz): + # GH 8917 + ts = Timestamp('2015-03-29 02:20:00') + result = ts.tz_localize(tz, nonexistent='NaT') + assert result is NaT + + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) + def test_timestamp_tz_localize_nonexistent_raise(self, tz): + # GH 8917 + ts = Timestamp('2015-03-29 02:20:00') + with pytest.raises(pytz.NonExistentTimeError): + ts.tz_localize(tz, nonexistent='raise') + with pytest.raises(ValueError): + ts.tz_localize(tz, nonexistent='foo') + # ------------------------------------------------------------------ # Timestamp.tz_convert diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 472b2c5644fa5..8c1ea6bff5f4d 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -13,7 +13,7 @@ from pandas._libs.tslibs import timezones, conversion from pandas.compat import lrange from pandas.core.indexes.datetimes import date_range -from pandas import Series, Timestamp, DatetimeIndex, Index +from pandas import Series, Timestamp, DatetimeIndex, Index, NaT class TestSeriesTimezones(object): @@ -33,6 +33,21 @@ def test_series_tz_localize(self): tm.assert_raises_regex(TypeError, 'Already tz-aware', ts.tz_localize, 'US/Eastern') + @pytest.mark.filterwarnings('ignore::FutureWarning') + def test_tz_localize_errors_deprecation(self): + # GH 22644 + tz = 'Europe/Warsaw' + n = 60 + rng = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') + ts = Series(rng) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(ValueError): + ts.dt.tz_localize(tz, errors='foo') + # make sure errors='coerce' gets mapped correctly to nonexistent + result = ts.dt.tz_localize(tz, errors='coerce') + expected = ts.dt.tz_localize(tz, nonexistent='NaT') + tm.assert_series_equal(result, expected) + def test_series_tz_localize_ambiguous_bool(self): # make sure that we are correctly accepting bool values as ambiguous @@ -60,6 +75,29 @@ def test_series_tz_localize_ambiguous_bool(self): result = ser.dt.tz_localize('US/Central', ambiguous=[False]) tm.assert_series_equal(result, expected1) + @pytest.mark.parametrize('tz', ['Europe/Warsaw', 'dateutil/Europe/Warsaw']) + @pytest.mark.parametrize('method, exp', [ + ['shift', '2015-03-29 03:00:00'], + ['NaT', NaT], + ['raise', None], + ['foo', 'invalid'] + ]) + def test_series_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 + n = 60 + dti = date_range(start='2015-03-29 02:00:00', periods=n, freq='min') + s = Series(1, dti) + if method == 'raise': + with pytest.raises(pytz.NonExistentTimeError): + s.tz_localize(tz, nonexistent=method) + elif exp == 'invalid': + with pytest.raises(ValueError): + dti.tz_localize(tz, nonexistent=method) + else: + result = s.tz_localize(tz, nonexistent=method) + expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz)) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) def test_series_tz_localize_empty(self, tzstr): # GH#2248