Skip to content

PERF: Timestamp.normalize #35068

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions asv_bench/benchmarks/tslibs/timestamp.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,29 @@
import datetime
from datetime import datetime, timedelta, timezone

import dateutil
from dateutil.tz import gettz, tzlocal, tzutc
import numpy as np
import pytz

from pandas import Timestamp

# One case for each type of tzinfo object that has its own code path
# in tzconversion code.
_tzs = [
None,
pytz.timezone("Europe/Amsterdam"),
gettz("US/Central"),
pytz.UTC,
tzutc(),
timezone(timedelta(minutes=60)),
tzlocal(),
]


class TimestampConstruction:
def setup(self):
self.npdatetime64 = np.datetime64("2020-01-01 00:00:00")
self.dttime_unaware = datetime.datetime(2020, 1, 1, 0, 0, 0)
self.dttime_aware = datetime.datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
self.dttime_unaware = datetime(2020, 1, 1, 0, 0, 0)
self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
self.ts = Timestamp("2020-01-01 00:00:00")

def time_parse_iso8601_no_tz(self):
Expand Down Expand Up @@ -49,7 +61,6 @@ def time_from_pd_timestamp(self):


class TimestampProperties:
_tzs = [None, pytz.timezone("Europe/Amsterdam"), pytz.UTC, dateutil.tz.tzutc()]
_freqs = [None, "B"]
params = [_tzs, _freqs]
param_names = ["tz", "freq"]
Expand Down Expand Up @@ -110,7 +121,7 @@ def time_weekday_name(self, tz, freq):


class TimestampOps:
params = [None, "US/Eastern", pytz.UTC, dateutil.tz.tzutc()]
params = _tzs
param_names = ["tz"]

def setup(self, tz):
Expand Down Expand Up @@ -148,7 +159,7 @@ def time_ceil(self, tz):

class TimestampAcrossDst:
def setup(self):
dt = datetime.datetime(2016, 3, 27, 1)
dt = datetime(2016, 3, 27, 1)
self.tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo
self.ts2 = Timestamp(dt)

Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ cpdef datetime localize_pydatetime(datetime dt, object tz)
cdef int64_t cast_from_unit(object ts, str unit) except? -1

cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz)
cdef int64_t normalize_i8_stamp(int64_t local_val) nogil
10 changes: 5 additions & 5 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -795,14 +795,14 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
result[i] = NPY_NAT
continue
local_val = stamps[i]
result[i] = _normalize_i8_stamp(local_val)
result[i] = normalize_i8_stamp(local_val)
elif is_tzlocal(tz):
for i in range(n):
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
continue
local_val = tz_convert_utc_to_tzlocal(stamps[i], tz)
result[i] = _normalize_i8_stamp(local_val)
result[i] = normalize_i8_stamp(local_val)
else:
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)
Expand All @@ -815,21 +815,21 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
result[i] = NPY_NAT
continue
local_val = stamps[i] + delta
result[i] = _normalize_i8_stamp(local_val)
result[i] = normalize_i8_stamp(local_val)
else:
pos = trans.searchsorted(stamps, side='right') - 1
for i in range(n):
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
continue
local_val = stamps[i] + deltas[pos[i]]
result[i] = _normalize_i8_stamp(local_val)
result[i] = normalize_i8_stamp(local_val)

return result.base # `.base` to access underlying ndarray


@cython.cdivision
cdef inline int64_t _normalize_i8_stamp(int64_t local_val) nogil:
cdef inline int64_t normalize_i8_stamp(int64_t local_val) nogil:
"""
Round the localized nanosecond timestamp down to the previous midnight.

Expand Down
28 changes: 15 additions & 13 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ from pandas._libs.tslibs.conversion cimport (
_TSObject,
convert_to_tsobject,
convert_datetime_to_tsobject,
normalize_i8_timestamps,
normalize_i8_stamp,
)
from pandas._libs.tslibs.fields import get_start_end_field, get_date_name_field
from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT
Expand Down Expand Up @@ -553,6 +553,20 @@ cdef class _Timestamp(ABCTimestamp):
"""
return ccalendar.get_days_in_month(self.year, self.month)

# -----------------------------------------------------------------
# Transformation Methods

def normalize(self) -> "Timestamp":
"""
Normalize Timestamp to midnight, preserving tz information.
"""
cdef:
local_val = self._maybe_convert_value_to_local()
int64_t normalized

normalized = normalize_i8_stamp(local_val)
return Timestamp(normalized).tz_localize(self.tzinfo)

# -----------------------------------------------------------------
# Pickle Methods

Expand Down Expand Up @@ -1455,18 +1469,6 @@ default 'raise'
self.nanosecond / 3600.0 / 1e+9
) / 24.0)

def normalize(self):
"""
Normalize Timestamp to midnight, preserving tz information.
"""
cdef:
ndarray[int64_t] normalized
tzinfo own_tz = self.tzinfo # could be None

normalized = normalize_i8_timestamps(
np.array([self.value], dtype="i8"), tz=own_tz)
return Timestamp(normalized[0]).tz_localize(own_tz)


# Aliases
Timestamp.weekofyear = Timestamp.week
Expand Down