From 696d9812ca3c3eeb1c1c8e9d80486b040e58ffa7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Mar 2022 17:04:45 -0700 Subject: [PATCH 1/5] avoid numpy calls in tzconversion --- pandas/_libs/tslibs/tzconversion.pyx | 35 ++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 705c4cef5c05d..fc30740389daa 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -339,27 +339,38 @@ cdef inline str _render_tstamp(int64_t val): cdef ndarray[int64_t] _get_dst_hours( # vals only needed here to potential render an exception message - ndarray[int64_t] vals, + const int64_t[:] vals, ndarray[int64_t] result_a, ndarray[int64_t] result_b, ): cdef: - Py_ssize_t n = vals.shape[0] + Py_ssize_t i, n = vals.shape[0] ndarray[uint8_t, cast=True] both_nat, both_eq ndarray[int64_t] delta, dst_hours - ndarray trans_idx, grp, a_idx, b_idx, one_diff + ndarray grp, a_idx, b_idx, one_diff list trans_grp + ndarray[intp_t] switch_idxs, trans_idx + intp_t switch_idx + int64_t left, right dst_hours = np.empty(n, dtype=np.int64) dst_hours[:] = NPY_NAT - # Get the ambiguous hours (given the above, these are the hours - # where result_a != result_b and neither of them are NAT) - both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) - both_eq = result_a == result_b - trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) + mismatch = np.zeros(n, dtype=bool) + + for i in range(n): + left = result_a[i] + right = result_b[i] + + # Get the ambiguous hours (given the above, these are the hours + # where result_a != result_b and neither of them are NAT) + if left != right and left != NPY_NAT and right != NPY_NAT: + mismatch[i] = 1 + + trans_idx = mismatch.nonzero()[0] + if trans_idx.size == 1: - stamp = _render_tstamp(vals[trans_idx]) + stamp = _render_tstamp(vals[trans_idx[0]]) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp} as there " "are no repeated times" @@ -385,14 +396,14 @@ cdef ndarray[int64_t] _get_dst_hours( # Find the index for the switch and pull from a for dst and b # for standard - switch_idx = (delta <= 0).nonzero()[0] - if switch_idx.size > 1: + switch_idxs = (delta <= 0).nonzero()[0] + if switch_idxs.size > 1: raise pytz.AmbiguousTimeError( f"There are {switch_idx.size} dst switches when " "there should only be 1." ) - switch_idx = switch_idx[0] + 1 # TODO: declare type for switch_idx + switch_idx = switch_idxs[0] + 1 # Pull the only index and adjust a_idx = grp[:switch_idx] b_idx = grp[switch_idx:] From 7c2f37bd8f6f3c3c01ff410d2be90b1699be4d44 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Mar 2022 17:21:22 -0700 Subject: [PATCH 2/5] improve annotations --- pandas/_libs/tslibs/tzconversion.pyx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index fc30740389daa..d3d5bfce6be4b 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -15,7 +15,6 @@ from cpython.datetime cimport ( import_datetime() -from dateutil.tz import tzutc import numpy as np import pytz @@ -345,11 +344,10 @@ cdef ndarray[int64_t] _get_dst_hours( ): cdef: Py_ssize_t i, n = vals.shape[0] - ndarray[uint8_t, cast=True] both_nat, both_eq + ndarray[uint8_t, cast=True] mismatch ndarray[int64_t] delta, dst_hours - ndarray grp, a_idx, b_idx, one_diff + ndarray[intp_t] switch_idxs, trans_idx, grp, a_idx, b_idx, one_diff list trans_grp - ndarray[intp_t] switch_idxs, trans_idx intp_t switch_idx int64_t left, right From fb1b97016980292408cb8489183b232e8aa62950 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Mar 2022 18:20:47 -0700 Subject: [PATCH 3/5] comment --- pandas/_libs/tslibs/tzconversion.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index d3d5bfce6be4b..bf35299ffd285 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -193,6 +193,8 @@ timedelta-like} result_b[:] = NPY_NAT for i in range(n): + # This loops resembles the "Find the two best possibilities" block + # in pytz's DstTZInfo.localize method. val = vals[i] if val == NPY_NAT: continue From b3bd0a25e79d419dc33aa1d7a7c93ca2657ef13b Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Mar 2022 19:08:10 -0700 Subject: [PATCH 4/5] fix build failure --- pandas/_libs/tslibs/tzconversion.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index bf35299ffd285..797aa8aa15520 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -399,7 +399,7 @@ cdef ndarray[int64_t] _get_dst_hours( switch_idxs = (delta <= 0).nonzero()[0] if switch_idxs.size > 1: raise pytz.AmbiguousTimeError( - f"There are {switch_idx.size} dst switches when " + f"There are {switch_idxs.size} dst switches when " "there should only be 1." ) From 03a0d6a879900461aa44465e0794666718e6df1c Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Mar 2022 19:09:20 -0700 Subject: [PATCH 5/5] remove unused import --- pandas/_libs/tslibs/tzconversion.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 797aa8aa15520..bc57cb8aaed83 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -39,7 +39,6 @@ from pandas._libs.tslibs.np_datetime cimport ( ) from pandas._libs.tslibs.timezones cimport ( get_dst_info, - get_utcoffset, is_fixed_offset, is_tzlocal, is_utc,