From 6e70d01f4aa8cfa5607daa8f3f74b0aad7b8fbe5 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 4 Jan 2020 10:03:43 +0300 Subject: [PATCH 01/12] PERF: add shortcut to Timestamp constructor --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/_libs/tslibs/timestamps.pyx | 5 ++++- pandas/tests/indexes/datetimes/test_tools.py | 7 +++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 087265858e850..5e87ef80ba959 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -925,6 +925,7 @@ Performance improvements The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`) - Performance improvement in :meth:`Index.equals` and :meth:`MultiIndex.equals` (:issue:`29134`) - Performance improvement in :func:`~pandas.api.types.infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`) +- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 36566b55e74ad..7a8dd62700c86 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -391,7 +391,10 @@ class Timestamp(_Timestamp): # User passed tzinfo instead of tz; avoid silently ignoring tz, tzinfo = tzinfo, None - if isinstance(ts_input, str): + if isinstance(ts_input, Timestamp) and tz is None: + # GH 30543 if pd.Timestamp already passed, return it + return ts_input + elif isinstance(ts_input, str): # User passed a date string to parse. # Check that the user didn't also pass a date attribute kwarg. if any(arg is not None for arg in _date_attributes): diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a5332eaea0432..f6a1cf20ab952 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -2313,3 +2313,10 @@ def test_nullable_integer_to_datetime(): tm.assert_series_equal(res, expected) # Check that ser isn't mutated tm.assert_series_equal(ser, ser_copy) + + +def test_timestamp_constructor_identity(): + # Test for #30543 + expected = pd.Timestamp("2017-01-01T12") + result = pd.Timestamp(expected) + assert result is expected From 9aa0156bfca58f30511186f351d57cbb6babc874 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 4 Jan 2020 10:39:25 +0300 Subject: [PATCH 02/12] CLN: move test to test_constructors.py --- pandas/tests/indexes/datetimes/test_constructors.py | 7 +++++++ pandas/tests/indexes/datetimes/test_tools.py | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index b6013c3939793..68285d41bda70 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -957,3 +957,10 @@ def test_timedelta_constructor_identity(): expected = pd.Timedelta(np.timedelta64(1, "s")) result = pd.Timedelta(expected) assert result is expected + + +def test_timestamp_constructor_identity(): + # Test for #30543 + expected = pd.Timestamp("2017-01-01T12") + result = pd.Timestamp(expected) + assert result is expected diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index f6a1cf20ab952..a5332eaea0432 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -2313,10 +2313,3 @@ def test_nullable_integer_to_datetime(): tm.assert_series_equal(res, expected) # Check that ser isn't mutated tm.assert_series_equal(ser, ser_copy) - - -def test_timestamp_constructor_identity(): - # Test for #30543 - expected = pd.Timestamp("2017-01-01T12") - result = pd.Timestamp(expected) - assert result is expected From 7cebba5bfbed739284b04755d3001f98938aebdf Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 15 Jan 2020 16:25:59 +0300 Subject: [PATCH 03/12] BUG: check that only Timestamp is passed --- pandas/_libs/tslibs/timestamps.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 7a8dd62700c86..23cf1da8bf5f8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -379,6 +379,8 @@ class Timestamp(_Timestamp): _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] + _non_ts_attributes = [freq, tz, unit, tzinfo] + _date_attributes + if tzinfo is not None: if not PyTZInfo_Check(tzinfo): # tzinfo must be a datetime.tzinfo object, GH#17690 @@ -391,8 +393,10 @@ class Timestamp(_Timestamp): # User passed tzinfo instead of tz; avoid silently ignoring tz, tzinfo = tzinfo, None - if isinstance(ts_input, Timestamp) and tz is None: - # GH 30543 if pd.Timestamp already passed, return it + # GH 30543 if pd.Timestamp already passed, return it + # check that only ts_input is passed + if (isinstance(ts_input, Timestamp) and not + any(arg is not None for arg in _non_ts_attributes)): return ts_input elif isinstance(ts_input, str): # User passed a date string to parse. From 7aea539c6e508d5b2710f1bbd09ffe756fa08816 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 16 Jan 2020 09:40:50 +0300 Subject: [PATCH 04/12] switch to explicit arg is none checks --- pandas/_libs/tslibs/timestamps.pyx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 23cf1da8bf5f8..ee899ed27605e 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -379,8 +379,6 @@ class Timestamp(_Timestamp): _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] - _non_ts_attributes = [freq, tz, unit, tzinfo] + _date_attributes - if tzinfo is not None: if not PyTZInfo_Check(tzinfo): # tzinfo must be a datetime.tzinfo object, GH#17690 @@ -395,8 +393,12 @@ class Timestamp(_Timestamp): # GH 30543 if pd.Timestamp already passed, return it # check that only ts_input is passed - if (isinstance(ts_input, Timestamp) and not - any(arg is not None for arg in _non_ts_attributes)): + if (isinstance(ts_input, Timestamp) and freq is None and + tz is None and unit is None and year is None and + month is None and day is None and hour is None and + minute is None and second is None and + microsecond is None and nanosecond is None and + tzinfo is None): return ts_input elif isinstance(ts_input, str): # User passed a date string to parse. From d2adb4fd254b6b9fb0179a05516d4ccfd55f8ff1 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 16 Jan 2020 10:20:16 +0300 Subject: [PATCH 05/12] DOC: move whatsnew to version 1.1.0 --- doc/source/whatsnew/v1.0.0.rst | 1 - doc/source/whatsnew/v1.1.0.rst | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 5e87ef80ba959..087265858e850 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -925,7 +925,6 @@ Performance improvements The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`) - Performance improvement in :meth:`Index.equals` and :meth:`MultiIndex.equals` (:issue:`29134`) - Performance improvement in :func:`~pandas.api.types.infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`) -- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index d0cf92b60fe0d..a0e1c964dd365 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -109,7 +109,9 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ + - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) +- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) - - From 3ea3f9133e9132a3b4325e03da486ef51423e6e0 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 18 Jan 2020 21:59:11 +0300 Subject: [PATCH 06/12] TST: add benchmarks for timestamp shortcut --- asv_bench/benchmarks/tslibs/timestamp.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index 8ebb2d8d2f35d..11b0997d4bb07 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -3,10 +3,14 @@ import dateutil import pytz -from pandas import Timestamp +from pandas import Series, Timestamp class TimestampConstruction: + def setup(self): + self.ts = Timestamp("2020-01-01 00:00:00") + self.ts_series = Series(range(10000)).astype(' Date: Sun, 19 Jan 2020 00:50:37 +0300 Subject: [PATCH 07/12] CLN: run black on the benchmark file --- asv_bench/benchmarks/tslibs/timestamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index 11b0997d4bb07..f1eda0d77167b 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -9,7 +9,7 @@ class TimestampConstruction: def setup(self): self.ts = Timestamp("2020-01-01 00:00:00") - self.ts_series = Series(range(10000)).astype(' Date: Mon, 20 Jan 2020 12:19:17 +0300 Subject: [PATCH 08/12] TST: add scalar benchmarks, remove series benchmark --- asv_bench/benchmarks/tslibs/timestamp.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index f1eda0d77167b..ab780ca9ef8cf 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -1,15 +1,18 @@ import datetime import dateutil +import numpy as np import pytz -from pandas import Series, Timestamp +from pandas import Timestamp class TimestampConstruction: def setup(self): + self.npdatetime64 = np.datetime64("2020-01-01 00:00:00") + self.dttime = datetime.datetime(2020, 1, 1, 0, 0, 0) + self.dttime_tzaware = datetime.datetime(2020, 1, 1, 0, 0, 0, pytz.UTC) self.ts = Timestamp("2020-01-01 00:00:00") - self.ts_series = Series(range(10000)).astype(" Date: Mon, 20 Jan 2020 13:16:16 +0300 Subject: [PATCH 09/12] fix numpy tzaware datetime constructor call --- asv_bench/benchmarks/tslibs/timestamp.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index ab780ca9ef8cf..3ef9b814dd79e 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -10,8 +10,8 @@ class TimestampConstruction: def setup(self): self.npdatetime64 = np.datetime64("2020-01-01 00:00:00") - self.dttime = datetime.datetime(2020, 1, 1, 0, 0, 0) - self.dttime_tzaware = datetime.datetime(2020, 1, 1, 0, 0, 0, pytz.UTC) + self.dttime_unaware = datetime.datetime(2020, 1, 1, 0, 0, 0) + self.dttime_aware = datetime.datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC) self.ts = Timestamp("2020-01-01 00:00:00") def time_parse_iso8601_no_tz(self): @@ -38,11 +38,11 @@ def time_fromtimestamp(self): def time_from_npdatetime64(self): Timestamp(self.npdatetime64) - def time_from_datetime(self): - Timestamp(self.dttime) + def time_from_datetime_unaware(self): + Timestamp(self.dttime_unaware) - def time_from_datetime_tzaware(self): - Timestamp(self.dttime_tzaware) + def time_from_datetime_aware(self): + Timestamp(self.dttime_aware) def time_from_pd_timestamp(self): Timestamp(self.ts) From c80f7484c1b8d9229669285b333f063861e690e1 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 21 Jan 2020 11:21:29 +0300 Subject: [PATCH 10/12] DOC: comment on reason for verbose check --- pandas/_libs/tslibs/timestamps.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ee899ed27605e..4915671aa6512 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -393,6 +393,8 @@ class Timestamp(_Timestamp): # GH 30543 if pd.Timestamp already passed, return it # check that only ts_input is passed + # checking verbosely, because cython doesn't optimize + # list comprehensions (as of cython 0.29.x) if (isinstance(ts_input, Timestamp) and freq is None and tz is None and unit is None and year is None and month is None and day is None and hour is None and From 9065888023837c8221221a1565a634080912ece9 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 24 Jan 2020 11:52:27 +0300 Subject: [PATCH 11/12] TST: remove xfail from test_dti_construction_ambiguous_endpoint --- pandas/tests/indexes/datetimes/test_timezones.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index c785eb67e5184..070949c1af575 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -11,7 +11,6 @@ import pytz from pandas._libs.tslibs import conversion, timezones -from pandas.compat._optional import _get_version import pandas.util._test_decorators as td import pandas as pd @@ -583,15 +582,7 @@ def test_dti_construction_ambiguous_endpoint(self, tz): ["US/Pacific", "shift_forward", "2019-03-10 03:00"], ["dateutil/US/Pacific", "shift_forward", "2019-03-10 03:00"], ["US/Pacific", "shift_backward", "2019-03-10 01:00"], - pytest.param( - "dateutil/US/Pacific", - "shift_backward", - "2019-03-10 01:00", - marks=pytest.mark.xfail( - LooseVersion(_get_version(dateutil)) < LooseVersion("2.7.0"), - reason="GH 31043", - ), - ), + ["dateutil/US/Pacific", "shift_backward", "2019-03-10 01:00"], ["US/Pacific", timedelta(hours=1), "2019-03-10 03:00"], ], ) From ba19e26ab76258e11afecaee6573cf533ea9e834 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 24 Jan 2020 12:24:00 +0300 Subject: [PATCH 12/12] CLN: remove unnecessary LooseVersion import --- pandas/tests/indexes/datetimes/test_timezones.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 070949c1af575..cd8e8c3542cce 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -2,7 +2,6 @@ Tests for DatetimeIndex timezone-related methods """ from datetime import date, datetime, time, timedelta, tzinfo -from distutils.version import LooseVersion import dateutil from dateutil.tz import gettz, tzlocal