diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py new file mode 100644 index 0000000000000..2a1f559bdf6d4 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/tz_convert.py @@ -0,0 +1,30 @@ +import numpy as np +from pytz import UTC + +from pandas._libs.tslibs.tzconversion import tz_convert, tz_localize_to_utc + +from .tslib import _sizes, _tzs + + +class TimeTZConvert: + params = ( + _sizes, + [x for x in _tzs if x is not None], + ) + param_names = ["size", "tz"] + + def setup(self, size, tz): + arr = np.random.randint(0, 10, size=size, dtype="i8") + self.i8data = arr + + def time_tz_convert_from_utc(self, size, tz): + # effectively: + # dti = DatetimeIndex(self.i8data, tz=tz) + # dti.tz_localize(None) + tz_convert(self.i8data, UTC, tz) + + def time_tz_localize_to_utc(self, size, tz): + # effectively: + # dti = DatetimeIndex(self.i8data) + # dti.tz_localize(tz, ambiguous="NaT", nonexistent="NaT") + tz_localize_to_utc(self.i8data, tz, ambiguous="NaT", nonexistent="NaT") diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index d1d6bc40ef288..9bd935940dc7b 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -551,29 +551,48 @@ cdef int64_t[:] _tz_convert_dst( int64_t[:] result = np.empty(n, dtype=np.int64) ndarray[int64_t] trans int64_t[:] deltas - int64_t v + int64_t v, delta + str typ # tz is assumed _not_ to be tzlocal; that should go # through _tz_convert_tzlocal_utc - trans, deltas, _ = get_dst_info(tz) - if not to_utc: - # We add `offset` below instead of subtracting it - deltas = -1 * np.array(deltas, dtype='i8') + trans, deltas, typ = get_dst_info(tz) - # Previously, this search was done pointwise to try and benefit - # from getting to skip searches for iNaTs. However, it seems call - # overhead dominates the search time so doing it once in bulk - # is substantially faster (GH#24603) - pos = trans.searchsorted(values, side='right') - 1 + if typ not in ["pytz", "dateutil"]: + # FixedOffset, we know len(deltas) == 1 + delta = deltas[0] - for i in range(n): - v = values[i] - if v == NPY_NAT: - result[i] = v - else: - if pos[i] < 0: - raise ValueError('First time before start of DST info') - result[i] = v - deltas[pos[i]] + for i in range(n): + v = values[i] + if v == NPY_NAT: + result[i] = v + else: + if to_utc: + result[i] = v - delta + else: + result[i] = v + delta + + else: + # Previously, this search was done pointwise to try and benefit + # from getting to skip searches for iNaTs. However, it seems call + # overhead dominates the search time so doing it once in bulk + # is substantially faster (GH#24603) + pos = trans.searchsorted(values, side="right") - 1 + + for i in range(n): + v = values[i] + if v == NPY_NAT: + result[i] = v + else: + if pos[i] < 0: + # TODO: How is this reached? Should we be checking for + # it elsewhere? + raise ValueError("First time before start of DST info") + + if to_utc: + result[i] = v - deltas[pos[i]] + else: + result[i] = v + deltas[pos[i]] return result