From 7b1e794fc0d14f779785a555f84f5f08c346fb04 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Feb 2023 10:22:09 -0800 Subject: [PATCH 1/2] Remove also-unraisable raising --- pandas/_libs/tslib.pyx | 4 --- pandas/_libs/tslibs/conversion.pyx | 53 +++++++---------------------- pandas/_libs/tslibs/np_datetime.pyx | 2 ++ 3 files changed, 15 insertions(+), 44 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 19dd7aabe6b8e..d828ea424c5a0 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -56,7 +56,6 @@ from pandas._libs.tslibs.conversion cimport ( convert_timezone, get_datetime64_nanos, parse_pydatetime, - precision_from_unit, ) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, @@ -258,7 +257,6 @@ def array_with_unit_to_datetime( """ cdef: Py_ssize_t i, n=len(values) - int64_t mult bint is_ignore = errors == "ignore" bint is_coerce = errors == "coerce" bint is_raise = errors == "raise" @@ -275,8 +273,6 @@ def array_with_unit_to_datetime( ) return result, tz - mult, _ = precision_from_unit(unit) - result = np.empty(n, dtype="M8[ns]") iresult = result.view("i8") diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 03a53b1b451e9..9bb5e2d0e0908 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -37,6 +37,7 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_FR_us, check_dts_bounds, convert_reso, + get_conversion_factor, get_datetime64_unit, get_datetime64_value, get_implementation_bounds, @@ -83,9 +84,9 @@ TD64NS_DTYPE = np.dtype("m8[ns]") # Unit Conversion Helpers cdef int64_t cast_from_unit( - object ts, - str unit, - NPY_DATETIMEUNIT out_reso=NPY_FR_ns + object ts, + str unit, + NPY_DATETIMEUNIT out_reso=NPY_FR_ns ) except? -1: """ Return a casting of the unit represented to nanoseconds @@ -104,12 +105,6 @@ cdef int64_t cast_from_unit( int64_t m int p - m, p = precision_from_unit(unit, out_reso) - - # just give me the unit back - if ts is None: - return m - if unit in ["Y", "M"]: if is_float_object(ts) and not ts.is_integer(): # GH#47267 it is clear that 2 "M" corresponds to 1970-02-01, @@ -126,6 +121,8 @@ cdef int64_t cast_from_unit( dt64obj = np.datetime64(ts, unit) return get_datetime64_nanos(dt64obj, out_reso) + m, p = precision_from_unit(unit, out_reso) + # cast the unit, multiply base/frac separately # to avoid precision issues from float -> int try: @@ -148,8 +145,8 @@ cdef int64_t cast_from_unit( cpdef inline (int64_t, int) precision_from_unit( - str unit, - NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns, + str unit, + NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns, ): """ Return a casting of the unit represented to nanoseconds + the precision @@ -162,38 +159,14 @@ cpdef inline (int64_t, int) precision_from_unit( """ cdef: int64_t m - int64_t multiplier int p NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit) - multiplier = periods_per_second(out_reso) - - if reso == NPY_DATETIMEUNIT.NPY_FR_Y: - # each 400 years we have 97 leap years, for an average of 97/400=.2425 - # extra days each year. We get 31556952 by writing - # 3600*24*365.2425=31556952 - m = multiplier * 31556952 - elif reso == NPY_DATETIMEUNIT.NPY_FR_M: - # 2629746 comes from dividing the "Y" case by 12. - m = multiplier * 2629746 - elif reso == NPY_DATETIMEUNIT.NPY_FR_W: - m = multiplier * 3600 * 24 * 7 - elif reso == NPY_DATETIMEUNIT.NPY_FR_D: - m = multiplier * 3600 * 24 - elif reso == NPY_DATETIMEUNIT.NPY_FR_h: - m = multiplier * 3600 - elif reso == NPY_DATETIMEUNIT.NPY_FR_m: - m = multiplier * 60 - elif reso == NPY_DATETIMEUNIT.NPY_FR_s: - m = multiplier - elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: - m = multiplier // 1_000 - elif reso == NPY_DATETIMEUNIT.NPY_FR_us: - m = multiplier // 1_000_000 - elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: - m = multiplier // 1_000_000_000 - else: - raise ValueError(f"cannot cast unit {unit}") + if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + reso = NPY_DATETIMEUNIT.NPY_FR_ns + + m = get_conversion_factor(reso, out_reso) + p = log10(m) # number of digits in 'm' minus 1 return m, p diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index aa3411385595b..d9aac87384952 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -571,6 +571,8 @@ cdef int64_t get_conversion_factor( return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit) elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs: return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit) + else: + raise ValueError("Converting from M or Y units is not supported.") cdef int64_t convert_reso( From e81de329e1139919786854766b98b9632905997a Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Feb 2023 19:31:25 -0800 Subject: [PATCH 2/2] Restore Y/M cases --- pandas/_libs/tslibs/conversion.pyx | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 9bb5e2d0e0908..b2c0ec1c1ffc6 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -159,13 +159,24 @@ cpdef inline (int64_t, int) precision_from_unit( """ cdef: int64_t m + int64_t multiplier int p NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit) if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: reso = NPY_DATETIMEUNIT.NPY_FR_ns - - m = get_conversion_factor(reso, out_reso) + if reso == NPY_DATETIMEUNIT.NPY_FR_Y: + # each 400 years we have 97 leap years, for an average of 97/400=.2425 + # extra days each year. We get 31556952 by writing + # 3600*24*365.2425=31556952 + multiplier = periods_per_second(out_reso) + m = multiplier * 31556952 + elif reso == NPY_DATETIMEUNIT.NPY_FR_M: + # 2629746 comes from dividing the "Y" case by 12. + multiplier = periods_per_second(out_reso) + m = multiplier * 2629746 + else: + m = get_conversion_factor(reso, out_reso) p = log10(m) # number of digits in 'm' minus 1 return m, p