From 93d80a810f22ca9b9d8b52764094957d9733c7de Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 23 Nov 2022 13:47:29 +0000 Subject: [PATCH 1/5] factor out parse_pydatetime --- pandas/_libs/tslib.pyx | 38 ++++-------- pandas/_libs/tslibs/conversion.pxd | 16 ++++++ pandas/_libs/tslibs/conversion.pyx | 92 ++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 28 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e01de6b70470e..096d45b4c4ea1 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -51,7 +51,9 @@ from pandas._libs.tslibs.conversion cimport ( cast_from_unit, convert_datetime_to_tsobject, get_datetime64_nanos, + parse_pydatetime, precision_from_unit, + validate_tzout, ) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, @@ -59,7 +61,6 @@ from pandas._libs.tslibs.nattype cimport ( c_nat_strings as nat_strings, ) from pandas._libs.tslibs.timestamps cimport _Timestamp -from pandas._libs.tslibs.timezones cimport tz_compare from pandas._libs.tslibs import ( Resolution, @@ -525,35 +526,16 @@ cpdef array_to_datetime( seen_datetime = True if val.tzinfo is not None: found_tz = True - if utc_convert: - _ts = convert_datetime_to_tsobject(val, None) - _ts.ensure_reso(NPY_FR_ns) - iresult[i] = _ts.value - elif found_naive: - raise ValueError('Tz-aware datetime.datetime ' - 'cannot be converted to ' - 'datetime64 unless utc=True') - elif tz_out is not None and not tz_compare(tz_out, val.tzinfo): - raise ValueError('Tz-aware datetime.datetime ' - 'cannot be converted to ' - 'datetime64 unless utc=True') - else: - found_tz = True - tz_out = val.tzinfo - _ts = convert_datetime_to_tsobject(val, None) - _ts.ensure_reso(NPY_FR_ns) - iresult[i] = _ts.value - else: found_naive = True - if found_tz and not utc_convert: - raise ValueError('Cannot mix tz-aware with ' - 'tz-naive values') - if isinstance(val, _Timestamp): - iresult[i] = val.as_unit("ns").value - else: - iresult[i] = pydatetime_to_dt64(val, &dts) - check_dts_bounds(&dts) + tz_out = validate_tzout( + val.tzinfo, + tz_out, + found_naive, + found_tz, + utc_convert, + ) + result[i] = parse_pydatetime(val, &dts, utc_convert) elif PyDate_Check(val): seen_datetime = True diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index c285b248f7a5b..78b944f588c6d 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -12,6 +12,8 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, npy_datetimestruct, ) +from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs.timezones cimport tz_compare cdef class _TSObject: @@ -40,3 +42,17 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1 cpdef (int64_t, int) precision_from_unit(str unit) cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso) + +cdef tzinfo validate_tzout( + tzinfo tz_in, + tzinfo tz_out, + bint found_naive, + bint found_tz, + bint utc_convert, +) + +cdef int64_t parse_pydatetime( + object val, + npy_datetimestruct *dts, + bint utc_convert, +) except -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 923dfa3c54d26..55cce9949b582 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -41,6 +41,7 @@ from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, + pydatetime_to_dt64, pydatetime_to_dtstruct, string_to_dts, ) @@ -642,3 +643,94 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): elif isinstance(dt, ABCTimestamp): return dt.tz_localize(tz) return _localize_pydatetime(dt, tz) + + +cdef tzinfo validate_tzout( + tzinfo tz_in, + tzinfo tz_out, + bint found_naive, + bint found_tz, + bint utc_convert, +): + """ + Validate that ``tz_in`` can be converted/localized to ``tz_out``. + + Parameters + ---------- + tz_in : tzinfo + Timezone info of element being processed. + tz_out : tzinfo + Timezone info of output. + found_naive : bool + Whether a timezone-naive element has been found so far. + found_tz : bool + Whether a timezone-aware element has been found so far. + utc_convert : bool + Whether to convert/localize to UTC. + + Returns + ------- + tz_info + Timezone info of output. + + Raises + ------ + ValueError + If ``tz_in`` can't be converted/localized to ``tz_out``. + """ + if tz_in is not None: + if utc_convert: + pass + elif found_naive: + raise ValueError('Tz-aware datetime.datetime ' + 'cannot be converted to ' + 'datetime64 unless utc=True') + elif tz_out is not None and not tz_compare(tz_out, tz_in): + raise ValueError('Tz-aware datetime.datetime ' + 'cannot be converted to ' + 'datetime64 unless utc=True') + else: + tz_out = tz_in + else: + if found_tz and not utc_convert: + raise ValueError('Cannot mix tz-aware with ' + 'tz-naive values') + return tz_out + +cdef int64_t parse_pydatetime( + object val, + npy_datetimestruct *dts, + bint utc_convert, +) except -1: + """ + Convert pydatetime to datetime64. + + Parameters + ---------- + val + Element being processed. + dts : *npy_datetimestruct + Needed to use in pydatetime_to_dt64, which writes to it. + utc_convert : bool + Whether to convert/localize to UTC. + + Raises + ------ + OutOfBoundsDatetime + """ + if val.tzinfo is not None: + if utc_convert: + _ts = convert_datetime_to_tsobject(val, None) + _ts.ensure_reso(NPY_FR_ns) + result = _ts.value + else: + _ts = convert_datetime_to_tsobject(val, None) + _ts.ensure_reso(NPY_FR_ns) + result = _ts.value + else: + if isinstance(val, _Timestamp): + result = val.as_unit("ns").value + else: + result = pydatetime_to_dt64(val, dts) + check_dts_bounds(dts) + return result From 657699ee12ea1e6446e6bfc45f0fa60c41126e45 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 23 Nov 2022 16:33:33 +0000 Subject: [PATCH 2/5] fix segfault --- pandas/_libs/tslibs/conversion.pxd | 2 +- pandas/_libs/tslibs/conversion.pyx | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 78b944f588c6d..15f32bbbbf83b 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -55,4 +55,4 @@ cdef int64_t parse_pydatetime( object val, npy_datetimestruct *dts, bint utc_convert, -) except -1 +) except * diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 55cce9949b582..c50ac3f43ea70 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -66,6 +66,7 @@ from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, c_nat_strings as nat_strings, ) +from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.tslibs.tzconversion cimport ( Localizer, tz_localize_to_utc_single, @@ -701,7 +702,7 @@ cdef int64_t parse_pydatetime( object val, npy_datetimestruct *dts, bint utc_convert, -) except -1: +) except *: """ Convert pydatetime to datetime64. From 51db700e2d53c75e78058d98be9ec95cb144d87d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 23 Nov 2022 18:37:09 +0000 Subject: [PATCH 3/5] :truck: rename validate_tzout to convert_timezone --- pandas/_libs/tslib.pyx | 4 ++-- pandas/_libs/tslibs/conversion.pxd | 4 ++-- pandas/_libs/tslibs/conversion.pyx | 9 +++++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 096d45b4c4ea1..5679ea9306c72 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -50,10 +50,10 @@ from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, convert_datetime_to_tsobject, + convert_timezone, get_datetime64_nanos, parse_pydatetime, precision_from_unit, - validate_tzout, ) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, @@ -528,7 +528,7 @@ cpdef array_to_datetime( found_tz = True else: found_naive = True - tz_out = validate_tzout( + tz_out = convert_timezone( val.tzinfo, tz_out, found_naive, diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 15f32bbbbf83b..d437069270ca2 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -43,7 +43,7 @@ cpdef (int64_t, int) precision_from_unit(str unit) cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso) -cdef tzinfo validate_tzout( +cdef tzinfo convert_timezone( tzinfo tz_in, tzinfo tz_out, bint found_naive, @@ -55,4 +55,4 @@ cdef int64_t parse_pydatetime( object val, npy_datetimestruct *dts, bint utc_convert, -) except * +) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index c50ac3f43ea70..ea60a9f5765fe 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -646,7 +646,7 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): return _localize_pydatetime(dt, tz) -cdef tzinfo validate_tzout( +cdef tzinfo convert_timezone( tzinfo tz_in, tzinfo tz_out, bint found_naive, @@ -698,11 +698,12 @@ cdef tzinfo validate_tzout( 'tz-naive values') return tz_out + cdef int64_t parse_pydatetime( object val, npy_datetimestruct *dts, bint utc_convert, -) except *: +) except? -1: """ Convert pydatetime to datetime64. @@ -719,6 +720,10 @@ cdef int64_t parse_pydatetime( ------ OutOfBoundsDatetime """ + cdef: + _TSObject _ts + int64_t result + if val.tzinfo is not None: if utc_convert: _ts = convert_datetime_to_tsobject(val, None) From ebb8b3905bdf96f9dca3e4d76321d63a7fd355d8 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 23 Nov 2022 19:14:54 +0000 Subject: [PATCH 4/5] change ensure_reso return value --- pandas/_libs/tslibs/conversion.pxd | 2 +- pandas/_libs/tslibs/conversion.pyx | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index d437069270ca2..edd69c7d6f744 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -24,7 +24,7 @@ cdef class _TSObject: bint fold NPY_DATETIMEUNIT creso - cdef void ensure_reso(self, NPY_DATETIMEUNIT creso) + cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index ea60a9f5765fe..fa1031487655f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -210,9 +210,10 @@ cdef class _TSObject: self.fold = 0 self.creso = NPY_FR_ns # default value - cdef void ensure_reso(self, NPY_DATETIMEUNIT creso): + cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso): if self.creso != creso: self.value = convert_reso(self.value, self.creso, creso, False) + return self.value cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, From b99ceb500c6fc09acd3902d71a41bb27717e7581 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 23 Nov 2022 19:37:27 +0000 Subject: [PATCH 5/5] add except? -1 to ensure_reso --- pandas/_libs/tslibs/conversion.pxd | 2 +- pandas/_libs/tslibs/conversion.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index edd69c7d6f744..dfb8b2009f0ec 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -24,7 +24,7 @@ cdef class _TSObject: bint fold NPY_DATETIMEUNIT creso - cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) + cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) except? -1 cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fa1031487655f..d0d6dc3f42d85 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -210,7 +210,7 @@ cdef class _TSObject: self.fold = 0 self.creso = NPY_FR_ns # default value - cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso): + cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso) except? -1: if self.creso != creso: self.value = convert_reso(self.value, self.creso, creso, False) return self.value