diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 4d0384abbf0c6..ccc34678a9f2c 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -279,6 +279,7 @@ Datetimelike - Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`) - Bug in comparisons between :class:`Timestamp` object and ``datetime64`` objects just outside the implementation bounds for nanosecond ``datetime64`` (:issue:`39221`) - Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`) +- Bug in :meth:`Timedelta.round`, :meth:`Timedelta.floor`, :meth:`Timedelta.ceil` for values near the implementation bounds of :class:`Timedelta` (:issue:`38964`) - Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`) Timedelta diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 57404b99c7628..2f25df9144f32 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -636,3 +636,154 @@ def get_locale_names(name_type: str, locale: object = None): """ with set_locale(locale, LC_TIME): return getattr(LocaleTime(), name_type) + + +# --------------------------------------------------------------------- +# Rounding + + +class RoundTo: + """ + enumeration defining the available rounding modes + + Attributes + ---------- + MINUS_INFTY + round towards -∞, or floor [2]_ + PLUS_INFTY + round towards +∞, or ceil [3]_ + NEAREST_HALF_EVEN + round to nearest, tie-break half to even [6]_ + NEAREST_HALF_MINUS_INFTY + round to nearest, tie-break half to -∞ [5]_ + NEAREST_HALF_PLUS_INFTY + round to nearest, tie-break half to +∞ [4]_ + + + References + ---------- + .. [1] "Rounding - Wikipedia" + https://en.wikipedia.org/wiki/Rounding + .. [2] "Rounding down" + https://en.wikipedia.org/wiki/Rounding#Rounding_down + .. [3] "Rounding up" + https://en.wikipedia.org/wiki/Rounding#Rounding_up + .. [4] "Round half up" + https://en.wikipedia.org/wiki/Rounding#Round_half_up + .. [5] "Round half down" + https://en.wikipedia.org/wiki/Rounding#Round_half_down + .. [6] "Round half to even" + https://en.wikipedia.org/wiki/Rounding#Round_half_to_even + """ + @property + def MINUS_INFTY(self) -> int: + return 0 + + @property + def PLUS_INFTY(self) -> int: + return 1 + + @property + def NEAREST_HALF_EVEN(self) -> int: + return 2 + + @property + def NEAREST_HALF_PLUS_INFTY(self) -> int: + return 3 + + @property + def NEAREST_HALF_MINUS_INFTY(self) -> int: + return 4 + + +cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] result = np.empty(n, dtype="i8") + int64_t res, value + + with cython.overflowcheck(True): + for i in range(n): + value = values[i] + if value == NPY_NAT: + res = NPY_NAT + else: + res = value - value % unit + result[i] = res + + return result + + +cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] result = np.empty(n, dtype="i8") + int64_t res, value + + with cython.overflowcheck(True): + for i in range(n): + value = values[i] + + if value == NPY_NAT: + res = NPY_NAT + else: + remainder = value % unit + if remainder == 0: + res = value + else: + res = value + (unit - remainder) + + result[i] = res + + return result + + +cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit): + return _ceil_int64(values - unit // 2, unit) + + +cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit): + return _floor_int64(values + unit // 2, unit) + + +def round_nsint64(values: np.ndarray, mode: RoundTo, nanos) -> np.ndarray: + """ + Applies rounding mode at given frequency + + Parameters + ---------- + values : np.ndarray[int64_t]` + mode : instance of `RoundTo` enumeration + nanos : np.int64 + Freq to round to, expressed in nanoseconds + + Returns + ------- + np.ndarray[int64_t] + """ + cdef: + int64_t unit = nanos + + if mode == RoundTo.MINUS_INFTY: + return _floor_int64(values, unit) + elif mode == RoundTo.PLUS_INFTY: + return _ceil_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_MINUS_INFTY: + return _rounddown_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_PLUS_INFTY: + return _roundup_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_EVEN: + # for odd unit there is no need of a tie break + if unit % 2: + return _rounddown_int64(values, unit) + quotient, remainder = np.divmod(values, unit) + mask = np.logical_or( + remainder > (unit // 2), + np.logical_and(remainder == (unit // 2), quotient % 2) + ) + quotient[mask] += 1 + return quotient * unit + + # if/elif above should catch all rounding modes defined in enum 'RoundTo': + # if flow of control arrives here, it is a bug + raise ValueError("round_nsint64 called with an unrecognized rounding mode") diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 25991cfbdb7a7..748a4c27e64ad 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -47,6 +47,7 @@ from pandas._libs.tslibs.util cimport ( is_integer_object, is_timedelta64_object, ) +from pandas._libs.tslibs.fields import RoundTo, round_nsint64 # ---------------------------------------------------------------------- # Constants @@ -1297,14 +1298,18 @@ class Timedelta(_Timedelta): object_state = self.value, return (Timedelta, object_state) - def _round(self, freq, rounder): + @cython.cdivision(True) + def _round(self, freq, mode): cdef: - int64_t result, unit + int64_t result, unit, remainder + ndarray[int64_t] arr from pandas._libs.tslibs.offsets import to_offset unit = to_offset(freq).nanos - result = unit * rounder(self.value / float(unit)) - return Timedelta(result, unit='ns') + + arr = np.array([self.value], dtype="i8") + result = round_nsint64(arr, mode, unit)[0] + return Timedelta(result, unit="ns") def round(self, freq): """ @@ -1323,7 +1328,7 @@ class Timedelta(_Timedelta): ------ ValueError if the freq cannot be converted """ - return self._round(freq, np.round) + return self._round(freq, RoundTo.NEAREST_HALF_EVEN) def floor(self, freq): """ @@ -1334,7 +1339,7 @@ class Timedelta(_Timedelta): freq : str Frequency string indicating the flooring resolution. """ - return self._round(freq, np.floor) + return self._round(freq, RoundTo.MINUS_INFTY) def ceil(self, freq): """ @@ -1345,7 +1350,7 @@ class Timedelta(_Timedelta): freq : str Frequency string indicating the ceiling resolution. """ - return self._round(freq, np.ceil) + return self._round(freq, RoundTo.PLUS_INFTY) # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 1df589073a6ba..5f6b614ac3d81 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -56,7 +56,12 @@ from pandas._libs.tslibs.util cimport ( is_timedelta64_object, ) -from pandas._libs.tslibs.fields import get_date_name_field, get_start_end_field +from pandas._libs.tslibs.fields import ( + RoundTo, + get_date_name_field, + get_start_end_field, + round_nsint64, +) from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT from pandas._libs.tslibs.np_datetime cimport ( @@ -110,152 +115,6 @@ cdef inline object create_timestamp_from_ts(int64_t value, return ts_base -class RoundTo: - """ - enumeration defining the available rounding modes - - Attributes - ---------- - MINUS_INFTY - round towards -∞, or floor [2]_ - PLUS_INFTY - round towards +∞, or ceil [3]_ - NEAREST_HALF_EVEN - round to nearest, tie-break half to even [6]_ - NEAREST_HALF_MINUS_INFTY - round to nearest, tie-break half to -∞ [5]_ - NEAREST_HALF_PLUS_INFTY - round to nearest, tie-break half to +∞ [4]_ - - - References - ---------- - .. [1] "Rounding - Wikipedia" - https://en.wikipedia.org/wiki/Rounding - .. [2] "Rounding down" - https://en.wikipedia.org/wiki/Rounding#Rounding_down - .. [3] "Rounding up" - https://en.wikipedia.org/wiki/Rounding#Rounding_up - .. [4] "Round half up" - https://en.wikipedia.org/wiki/Rounding#Round_half_up - .. [5] "Round half down" - https://en.wikipedia.org/wiki/Rounding#Round_half_down - .. [6] "Round half to even" - https://en.wikipedia.org/wiki/Rounding#Round_half_to_even - """ - @property - def MINUS_INFTY(self) -> int: - return 0 - - @property - def PLUS_INFTY(self) -> int: - return 1 - - @property - def NEAREST_HALF_EVEN(self) -> int: - return 2 - - @property - def NEAREST_HALF_PLUS_INFTY(self) -> int: - return 3 - - @property - def NEAREST_HALF_MINUS_INFTY(self) -> int: - return 4 - - -cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit): - cdef: - Py_ssize_t i, n = len(values) - ndarray[int64_t] result = np.empty(n, dtype="i8") - int64_t res, value - - with cython.overflowcheck(True): - for i in range(n): - value = values[i] - if value == NPY_NAT: - res = NPY_NAT - else: - res = value - value % unit - result[i] = res - - return result - - -cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit): - cdef: - Py_ssize_t i, n = len(values) - ndarray[int64_t] result = np.empty(n, dtype="i8") - int64_t res, value - - with cython.overflowcheck(True): - for i in range(n): - value = values[i] - - if value == NPY_NAT: - res = NPY_NAT - else: - remainder = value % unit - if remainder == 0: - res = value - else: - res = value + (unit - remainder) - - result[i] = res - - return result - - -cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit): - return _ceil_int64(values - unit//2, unit) - - -cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit): - return _floor_int64(values + unit//2, unit) - - -def round_nsint64(values: np.ndarray, mode: RoundTo, freq) -> np.ndarray: - """ - Applies rounding mode at given frequency - - Parameters - ---------- - values : np.ndarray[int64_t]` - mode : instance of `RoundTo` enumeration - freq : str, obj - - Returns - ------- - np.ndarray[int64_t] - """ - - unit = to_offset(freq).nanos - - if mode == RoundTo.MINUS_INFTY: - return _floor_int64(values, unit) - elif mode == RoundTo.PLUS_INFTY: - return _ceil_int64(values, unit) - elif mode == RoundTo.NEAREST_HALF_MINUS_INFTY: - return _rounddown_int64(values, unit) - elif mode == RoundTo.NEAREST_HALF_PLUS_INFTY: - return _roundup_int64(values, unit) - elif mode == RoundTo.NEAREST_HALF_EVEN: - # for odd unit there is no need of a tie break - if unit % 2: - return _rounddown_int64(values, unit) - quotient, remainder = np.divmod(values, unit) - mask = np.logical_or( - remainder > (unit // 2), - np.logical_and(remainder == (unit // 2), quotient % 2) - ) - quotient[mask] += 1 - return quotient * unit - - # if/elif above should catch all rounding modes defined in enum 'RoundTo': - # if flow of control arrives here, it is a bug - raise ValueError("round_nsint64 called with an unrecognized rounding mode") - - # ---------------------------------------------------------------------- def integer_op_not_supported(obj): @@ -1181,6 +1040,9 @@ class Timestamp(_Timestamp): return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): + cdef: + int64_t nanos = to_offset(freq).nanos + if self.tz is not None: value = self.tz_localize(None).value else: @@ -1189,7 +1051,7 @@ class Timestamp(_Timestamp): value = np.array([value], dtype=np.int64) # Will only ever contain 1 element for timestamp - r = round_nsint64(value, mode, freq)[0] + r = round_nsint64(value, mode, nanos)[0] result = Timestamp(r, unit='ns') if self.tz is not None: result = result.tz_localize( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1032559766ada..5ee7a5715d6af 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -32,11 +32,8 @@ iNaT, to_offset, ) -from pandas._libs.tslibs.timestamps import ( - RoundTo, - integer_op_not_supported, - round_nsint64, -) +from pandas._libs.tslibs.fields import RoundTo, round_nsint64 +from pandas._libs.tslibs.timestamps import integer_op_not_supported from pandas._typing import DatetimeLikeScalar, Dtype, DtypeObj, NpDtype from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError, NullFrequencyError, PerformanceWarning @@ -1606,7 +1603,8 @@ def _round(self, freq, mode, ambiguous, nonexistent): ) values = self.view("i8") - result = round_nsint64(values, mode, freq) + nanos = to_offset(freq).nanos + result = round_nsint64(values, mode, nanos) result = self._maybe_mask_results(result, fill_value=iNaT) return self._simple_new(result, dtype=self.dtype) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 89b45b7266daa..906ed038c4840 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -357,6 +357,68 @@ def test_round_invalid(self): with pytest.raises(ValueError, match=msg): t1.round(freq) + def test_round_implementation_bounds(self): + # See also: analogous test for Timestamp + # GH#38964 + result = Timedelta.min.ceil("s") + expected = Timedelta.min + Timedelta(seconds=1) - Timedelta(145224193) + assert result == expected + + result = Timedelta.max.floor("s") + expected = Timedelta.max - Timedelta(854775807) + assert result == expected + + with pytest.raises(OverflowError, match="value too large"): + Timedelta.min.floor("s") + + # the second message here shows up in windows builds + msg = "|".join( + ["Python int too large to convert to C long", "int too big to convert"] + ) + with pytest.raises(OverflowError, match=msg): + Timedelta.max.ceil("s") + + @pytest.mark.parametrize("n", range(100)) + @pytest.mark.parametrize( + "method", [Timedelta.round, Timedelta.floor, Timedelta.ceil] + ) + def test_round_sanity(self, method, n, request): + iinfo = np.iinfo(np.int64) + val = np.random.randint(iinfo.min + 1, iinfo.max, dtype=np.int64) + td = Timedelta(val) + + assert method(td, "ns") == td + + res = method(td, "us") + nanos = 1000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + res = method(td, "ms") + nanos = 1_000_000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + res = method(td, "s") + nanos = 1_000_000_000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + res = method(td, "min") + nanos = 60 * 1_000_000_000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + res = method(td, "h") + nanos = 60 * 60 * 1_000_000_000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + + res = method(td, "D") + nanos = 24 * 60 * 60 * 1_000_000_000 + assert np.abs((res - td).value) < nanos + assert res.value % nanos == 0 + def test_contains(self): # Checking for any NaT-like objects # GH 13603 diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index b619c9c9632e3..3f3a3af658969 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -95,6 +95,8 @@ def test_replace_gh5319(self): expected = ser.ffill() result = ser.replace(np.nan) tm.assert_series_equal(result, expected) + + def test_replace_datetime64(self): # GH 5797 ser = pd.Series(pd.date_range("20130101", periods=5)) expected = ser.copy() @@ -104,6 +106,7 @@ def test_replace_gh5319(self): result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101")) tm.assert_series_equal(result, expected) + def test_replace_nat_with_tz(self): # GH 11792: Test with replacing NaT in a list with tz data ts = pd.Timestamp("2015/01/01", tz="UTC") s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")])