From 1ee540eb99e4aa55d6ead864f87cd1413b9c6ca7 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Jun 2020 13:13:39 -0700 Subject: [PATCH 1/4] PERF: type tz kwarg in create_timestamp_from_ts --- pandas/_libs/tslib.pyx | 17 ++++++++++------- pandas/_libs/tslibs/timestamps.pxd | 4 ++-- pandas/_libs/tslibs/timestamps.pyx | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 44693d60486a9..f494e74bde55f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -8,6 +8,7 @@ from cpython.datetime cimport ( datetime, time, timedelta, + tzinfo, ) # import datetime C API PyDateTime_IMPORT @@ -77,9 +78,9 @@ from pandas._libs.missing cimport checknull_with_nat_and_na cdef inline object create_datetime_from_ts( int64_t value, npy_datetimestruct dts, - object tz, + tzinfo tz, object freq, - bint fold + bint fold, ): """ Convenience routine to construct a datetime.datetime from its parts. @@ -92,7 +93,7 @@ cdef inline object create_datetime_from_ts( cdef inline object create_date_from_ts( int64_t value, npy_datetimestruct dts, - object tz, + tzinfo tz, object freq, bint fold ): @@ -106,7 +107,7 @@ cdef inline object create_date_from_ts( cdef inline object create_time_from_ts( int64_t value, npy_datetimestruct dts, - object tz, + tzinfo tz, object freq, bint fold ): @@ -120,7 +121,7 @@ cdef inline object create_time_from_ts( @cython.boundscheck(False) def ints_to_pydatetime( const int64_t[:] arr, - object tz=None, + tzinfo tz=None, object freq=None, bint fold=False, str box="datetime" @@ -162,7 +163,7 @@ def ints_to_pydatetime( str typ int64_t value, delta, local_value ndarray[object] result = np.empty(n, dtype=object) - object (*func_create)(int64_t, npy_datetimestruct, object, object, bint) + object (*func_create)(int64_t, npy_datetimestruct, tzinfo, object, bint) if box == "date": assert (tz is None), "tz should be None when converting to date" @@ -178,7 +179,9 @@ def ints_to_pydatetime( elif box == "datetime": func_create = create_datetime_from_ts else: - raise ValueError("box must be one of 'datetime', 'date', 'time' or 'timestamp'") + raise ValueError( + "box must be one of 'datetime', 'date', 'time' or 'timestamp'" + ) if is_utc(tz) or tz is None: for i in range(n): diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 27b659980e526..307b6dfc90715 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -1,4 +1,4 @@ -from cpython.datetime cimport datetime +from cpython.datetime cimport datetime, tzinfo from numpy cimport int64_t @@ -8,7 +8,7 @@ from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct cdef object create_timestamp_from_ts(int64_t value, npy_datetimestruct dts, - object tz, object freq, bint fold) + tzinfo tz, object freq, bint fold) cdef class _Timestamp(ABCTimestamp): diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 15fcfc742ecf3..355dc0dbc5820 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -69,7 +69,7 @@ _no_input = object() cdef inline object create_timestamp_from_ts(int64_t value, npy_datetimestruct dts, - object tz, object freq, bint fold): + tzinfo tz, object freq, bint fold): """ convenience routine to construct a Timestamp from its parts """ cdef _Timestamp ts_base ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, From 3bcb1614e26685d129df09443ba186c67c164ec8 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 14:36:59 -0700 Subject: [PATCH 2/4] asv: imlement asvs for ints_to_pydatetime --- asv_bench/benchmarks/tslibs/tslib.py | 54 ++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 asv_bench/benchmarks/tslibs/tslib.py diff --git a/asv_bench/benchmarks/tslibs/tslib.py b/asv_bench/benchmarks/tslibs/tslib.py new file mode 100644 index 0000000000000..fce53ee0e7907 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/tslib.py @@ -0,0 +1,54 @@ +""" +ipython analogue: + +tr = TimeIntsToPydatetime() +mi = pd.MultiIndex.from_product(tr.params[:-1] + ([str(x) for x in tr.params[-1]],)) +df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"]) +for box in tr.params[0]: + for size in tr.params[1]: + for tz in tr.params[2]: + tr.setup(box, size, tz) + key = (box, size, str(tz)) + print(key) + val = %timeit -o tr.time_ints_to_pydatetime(box, size, tz) + df.loc[key] = (val.average, val.stdev) +""" +from datetime import timedelta, timezone + +from dateutil.tz import gettz, tzlocal +import numpy as np +import pytz + +from pandas._libs.tslib import ints_to_pydatetime + + +_tzs = [ + None, + timezone.utc, + timezone(timedelta(minutes=60)), + pytz.timezone("US/Pacific"), + gettz("Asia/Tokyo"), + tzlocal(), +] +_sizes = [0, 1, 100, 10**4, 10 ** 6] + + +class TimeIntsToPydatetime: + params = ( + ["time", "date", "datetime", "timestamp"], + _sizes, + _tzs, + ) + param_names = ["box", "size", "tz"] + # TODO: fold? freq? + + def setup(self, box, size, tz): + arr = np.random.randint(0, 10, size=size, dtype="i8") + self.i8data = arr + + def time_ints_to_pydatetime(self, box, size, tz): + if box == "date": + # ints_to_pydatetime does not allow non-None tz with date; + # this will mean doing some duplicate benchmarks + tz = None + ints_to_pydatetime(self.i8data, tz, box=box) From a063a4b78efae268bb8f20fc6869933cd6e2e5a3 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 14:42:30 -0700 Subject: [PATCH 3/4] litn fixup --- asv_bench/benchmarks/tslibs/tslib.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/tslibs/tslib.py b/asv_bench/benchmarks/tslibs/tslib.py index fce53ee0e7907..5874f5610f567 100644 --- a/asv_bench/benchmarks/tslibs/tslib.py +++ b/asv_bench/benchmarks/tslibs/tslib.py @@ -2,7 +2,9 @@ ipython analogue: tr = TimeIntsToPydatetime() -mi = pd.MultiIndex.from_product(tr.params[:-1] + ([str(x) for x in tr.params[-1]],)) +mi = pd.MultiIndex.from_product( + tr.params[:-1] + ([str(x) for x in tr.params[-1]],) +) df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"]) for box in tr.params[0]: for size in tr.params[1]: From 6a4e2bb96f282540ea1c36f434c5e497480751a4 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 15:28:26 -0700 Subject: [PATCH 4/4] blackify --- asv_bench/benchmarks/tslibs/tslib.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/tslib.py b/asv_bench/benchmarks/tslibs/tslib.py index 5874f5610f567..eacf5a5731dc2 100644 --- a/asv_bench/benchmarks/tslibs/tslib.py +++ b/asv_bench/benchmarks/tslibs/tslib.py @@ -23,7 +23,6 @@ from pandas._libs.tslib import ints_to_pydatetime - _tzs = [ None, timezone.utc, @@ -32,7 +31,7 @@ gettz("Asia/Tokyo"), tzlocal(), ] -_sizes = [0, 1, 100, 10**4, 10 ** 6] +_sizes = [0, 1, 100, 10 ** 4, 10 ** 6] class TimeIntsToPydatetime: