From 4526d3c83d7a3c17241312ea648f104be5eff5aa Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 28 Aug 2021 13:22:06 +0530 Subject: [PATCH 1/2] PERF: to_datetime with uint --- asv_bench/benchmarks/inference.py | 10 +++++++++- pandas/_libs/tslib.pyx | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 0aa924dabd469..4cbaa184791b8 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -115,19 +115,27 @@ def time_maybe_convert_objects(self): class ToDatetimeFromIntsFloats: def setup(self): self.ts_sec = Series(range(1521080307, 1521685107), dtype="int64") + self.ts_sec_uint = Series(range(1521080307, 1521685107), dtype="uint64") self.ts_sec_float = self.ts_sec.astype("float64") self.ts_nanosec = 1_000_000 * self.ts_sec + self.ts_nanosec_uint = 1_000_000 * self.ts_sec_uint self.ts_nanosec_float = self.ts_nanosec.astype("float64") - # speed of int64 and float64 paths should be comparable + # speed of int64, uint64 and float64 paths should be comparable def time_nanosec_int64(self): to_datetime(self.ts_nanosec, unit="ns") + def time_nanosec_uint64(self): + to_datetime(self.ts_nanosec_uint, unit="ns") + def time_nanosec_float64(self): to_datetime(self.ts_nanosec_float, unit="ns") + def time_sec_uint64(self): + to_datetime(self.ts_sec_uint, unit="s") + def time_sec_int64(self): to_datetime(self.ts_sec, unit="s") diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 6b1c0f851f8e7..6feb9ec768655 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -248,7 +248,7 @@ def array_with_unit_to_datetime( # if we have nulls that are not type-compat # then need to iterate - if values.dtype.kind == "i" or values.dtype.kind == "f": + if values.dtype.kind in ["i", "f", "u"]: iresult = values.astype("i8", copy=False) # fill missing values by comparing to NPY_NAT mask = iresult == NPY_NAT @@ -263,7 +263,7 @@ def array_with_unit_to_datetime( ): raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - if values.dtype.kind == "i": + if values.dtype.kind in ["i", "u"]: result = (iresult * m).astype("M8[ns]") elif values.dtype.kind == "f": From 8678e88976d3876b67b361f33dda5d72078ac95f Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sun, 29 Aug 2021 17:55:11 +0530 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index fc488504f1fdf..ad1b070c23010 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -242,7 +242,7 @@ Performance improvements - Performance improvement in :meth:`DataFrame.corr` for ``method=pearson`` on data without missing values (:issue:`40956`) - Performance improvement in some :meth:`GroupBy.apply` operations (:issue:`42992`) - Performance improvement in :func:`read_stata` (:issue:`43059`) -- +- Performance improvement in :meth:`to_datetime` with ``uint`` dtypes (:issue:`42606`) .. ---------------------------------------------------------------------------