Skip to content

Commit d90ca45

Browse files
authored
PERF: tz_localize(None) from dst, asvs (#35106)
* PERF: avoid copy in tz_convert dst cases * ASVs * asv fixup
1 parent 7970069 commit d90ca45

File tree

2 files changed

+67
-18
lines changed

2 files changed

+67
-18
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
from pytz import UTC
3+
4+
from pandas._libs.tslibs.tzconversion import tz_convert, tz_localize_to_utc
5+
6+
from .tslib import _sizes, _tzs
7+
8+
9+
class TimeTZConvert:
10+
params = (
11+
_sizes,
12+
[x for x in _tzs if x is not None],
13+
)
14+
param_names = ["size", "tz"]
15+
16+
def setup(self, size, tz):
17+
arr = np.random.randint(0, 10, size=size, dtype="i8")
18+
self.i8data = arr
19+
20+
def time_tz_convert_from_utc(self, size, tz):
21+
# effectively:
22+
# dti = DatetimeIndex(self.i8data, tz=tz)
23+
# dti.tz_localize(None)
24+
tz_convert(self.i8data, UTC, tz)
25+
26+
def time_tz_localize_to_utc(self, size, tz):
27+
# effectively:
28+
# dti = DatetimeIndex(self.i8data)
29+
# dti.tz_localize(tz, ambiguous="NaT", nonexistent="NaT")
30+
tz_localize_to_utc(self.i8data, tz, ambiguous="NaT", nonexistent="NaT")

pandas/_libs/tslibs/tzconversion.pyx

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -552,29 +552,48 @@ cdef int64_t[:] _tz_convert_dst(
552552
int64_t[:] result = np.empty(n, dtype=np.int64)
553553
ndarray[int64_t] trans
554554
int64_t[:] deltas
555-
int64_t v
555+
int64_t v, delta
556+
str typ
556557

557558
# tz is assumed _not_ to be tzlocal; that should go
558559
# through _tz_convert_tzlocal_utc
559560

560-
trans, deltas, _ = get_dst_info(tz)
561-
if not to_utc:
562-
# We add `offset` below instead of subtracting it
563-
deltas = -1 * np.array(deltas, dtype='i8')
561+
trans, deltas, typ = get_dst_info(tz)
564562

565-
# Previously, this search was done pointwise to try and benefit
566-
# from getting to skip searches for iNaTs. However, it seems call
567-
# overhead dominates the search time so doing it once in bulk
568-
# is substantially faster (GH#24603)
569-
pos = trans.searchsorted(values, side='right') - 1
563+
if typ not in ["pytz", "dateutil"]:
564+
# FixedOffset, we know len(deltas) == 1
565+
delta = deltas[0]
570566

571-
for i in range(n):
572-
v = values[i]
573-
if v == NPY_NAT:
574-
result[i] = v
575-
else:
576-
if pos[i] < 0:
577-
raise ValueError('First time before start of DST info')
578-
result[i] = v - deltas[pos[i]]
567+
for i in range(n):
568+
v = values[i]
569+
if v == NPY_NAT:
570+
result[i] = v
571+
else:
572+
if to_utc:
573+
result[i] = v - delta
574+
else:
575+
result[i] = v + delta
576+
577+
else:
578+
# Previously, this search was done pointwise to try and benefit
579+
# from getting to skip searches for iNaTs. However, it seems call
580+
# overhead dominates the search time so doing it once in bulk
581+
# is substantially faster (GH#24603)
582+
pos = trans.searchsorted(values, side="right") - 1
583+
584+
for i in range(n):
585+
v = values[i]
586+
if v == NPY_NAT:
587+
result[i] = v
588+
else:
589+
if pos[i] < 0:
590+
# TODO: How is this reached? Should we be checking for
591+
# it elsewhere?
592+
raise ValueError("First time before start of DST info")
593+
594+
if to_utc:
595+
result[i] = v - deltas[pos[i]]
596+
else:
597+
result[i] = v + deltas[pos[i]]
579598

580599
return result

0 commit comments

Comments
 (0)