From 39c66e06ad3f8ed931e794ccc68733cc84bddb0d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Jun 2020 13:21:40 -0700 Subject: [PATCH 1/2] PERF: Timestamp.normalize --- asv_bench/benchmarks/tslibs/timestamp.py | 23 ++++++++++++---- pandas/_libs/tslibs/conversion.pxd | 1 + pandas/_libs/tslibs/conversion.pyx | 10 +++---- pandas/_libs/tslibs/timestamps.pyx | 34 +++++++++++++----------- 4 files changed, 43 insertions(+), 25 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index 3ef9b814dd79e..ce99f467bdd5f 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -1,17 +1,31 @@ +from datetime import datetime, timezone, timedelta import datetime +from dateutil.tz import gettz, tzlocal, tzutc import dateutil import numpy as np import pytz from pandas import Timestamp +# One case for each type of tzinfo object that has its own code path +# in tzconversion code. +_tzs = [ + None, + pytz.timezone("Europe/Amsterdam"), + gettz("US/Central"), + pytz.UTC, + tzutc(), + timezone(timedelta(minutes=60)), + tzlocal(), +] + class TimestampConstruction: def setup(self): self.npdatetime64 = np.datetime64("2020-01-01 00:00:00") - self.dttime_unaware = datetime.datetime(2020, 1, 1, 0, 0, 0) - self.dttime_aware = datetime.datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC) + self.dttime_unaware = datetime(2020, 1, 1, 0, 0, 0) + self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC) self.ts = Timestamp("2020-01-01 00:00:00") def time_parse_iso8601_no_tz(self): @@ -49,7 +63,6 @@ def time_from_pd_timestamp(self): class TimestampProperties: - _tzs = [None, pytz.timezone("Europe/Amsterdam"), pytz.UTC, dateutil.tz.tzutc()] _freqs = [None, "B"] params = [_tzs, _freqs] param_names = ["tz", "freq"] @@ -110,7 +123,7 @@ def time_month_name(self, tz, freq): class TimestampOps: - params = [None, "US/Eastern", pytz.UTC, dateutil.tz.tzutc()] + params = _tzs param_names = ["tz"] def setup(self, tz): @@ -148,7 +161,7 @@ def time_ceil(self, tz): class TimestampAcrossDst: def setup(self): - dt = datetime.datetime(2016, 3, 27, 1) + dt = datetime(2016, 3, 27, 1) self.tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo self.ts2 = Timestamp(dt) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 94f6d1d9020d2..623d9f14d646b 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -26,3 +26,4 @@ cpdef datetime localize_pydatetime(datetime dt, object tz) cdef int64_t cast_from_unit(object ts, str unit) except? -1 cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz) +cdef int64_t normalize_i8_stamp(int64_t local_val) nogil diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 884715f482cad..5da873a0d1c02 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -795,14 +795,14 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t result[i] = NPY_NAT continue local_val = stamps[i] - result[i] = _normalize_i8_stamp(local_val) + result[i] = normalize_i8_stamp(local_val) elif is_tzlocal(tz): for i in range(n): if stamps[i] == NPY_NAT: result[i] = NPY_NAT continue local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) - result[i] = _normalize_i8_stamp(local_val) + result[i] = normalize_i8_stamp(local_val) else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) @@ -815,7 +815,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t result[i] = NPY_NAT continue local_val = stamps[i] + delta - result[i] = _normalize_i8_stamp(local_val) + result[i] = normalize_i8_stamp(local_val) else: pos = trans.searchsorted(stamps, side='right') - 1 for i in range(n): @@ -823,13 +823,13 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t result[i] = NPY_NAT continue local_val = stamps[i] + deltas[pos[i]] - result[i] = _normalize_i8_stamp(local_val) + result[i] = normalize_i8_stamp(local_val) return result.base # `.base` to access underlying ndarray @cython.cdivision -cdef inline int64_t _normalize_i8_stamp(int64_t local_val) nogil: +cdef inline int64_t normalize_i8_stamp(int64_t local_val) nogil: """ Round the localized nanosecond timestamp down to the previous midnight. diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 15fcfc742ecf3..7e36aefe95fa8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -40,7 +40,7 @@ from pandas._libs.tslibs.conversion cimport ( _TSObject, convert_to_tsobject, convert_datetime_to_tsobject, - normalize_i8_timestamps, + normalize_i8_stamp, ) from pandas._libs.tslibs.fields import get_start_end_field, get_date_name_field from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT @@ -447,9 +447,11 @@ cdef class _Timestamp(ABCTimestamp): """Convert UTC i8 value to local i8 value if tz exists""" cdef: int64_t val + tzinfo own_tz = self.tzinfo + val = self.value - if self.tz is not None and not is_utc(self.tz): - val = tz_convert_single(self.value, UTC, self.tz) + if own_tz is not None and not is_utc(own_tz): + val = tz_convert_single(self.value, UTC, own_tz) return val cdef bint _get_start_end_field(self, str field): @@ -619,6 +621,20 @@ cdef class _Timestamp(ABCTimestamp): """ return ccalendar.get_days_in_month(self.year, self.month) + # ----------------------------------------------------------------- + # Transformation Methods + + def normalize(self) -> "Timestamp": + """ + Normalize Timestamp to midnight, preserving tz information. + """ + cdef: + local_val = self._maybe_convert_value_to_local() + int64_t normalized + + normalized = normalize_i8_stamp(local_val) + return Timestamp(normalized).tz_localize(self.tzinfo) + # ----------------------------------------------------------------- # Rendering Methods @@ -1446,18 +1462,6 @@ default 'raise' self.nanosecond / 3600.0 / 1e+9 ) / 24.0) - def normalize(self): - """ - Normalize Timestamp to midnight, preserving tz information. - """ - cdef: - ndarray[int64_t] normalized - tzinfo own_tz = self.tzinfo # could be None - - normalized = normalize_i8_timestamps( - np.array([self.value], dtype="i8"), tz=own_tz) - return Timestamp(normalized[0]).tz_localize(own_tz) - # Aliases Timestamp.weekofyear = Timestamp.week From 3bb62bc5a9574561544859651fc621b1b5c32982 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Jun 2020 14:29:55 -0700 Subject: [PATCH 2/2] lint fixup --- asv_bench/benchmarks/tslibs/timestamp.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index ce99f467bdd5f..4d816b72454ce 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -1,8 +1,6 @@ -from datetime import datetime, timezone, timedelta -import datetime +from datetime import datetime, timedelta, timezone from dateutil.tz import gettz, tzlocal, tzutc -import dateutil import numpy as np import pytz