From 1c65b9624de57aeabed1c9f027e8976945afa8e4 Mon Sep 17 00:00:00 2001 From: agijsberts Date: Sat, 15 Mar 2014 21:47:04 +0100 Subject: [PATCH 1/4] Speed up DatetimeConverter by using Matplotlib's epoch2num when possible. --- pandas/tseries/converter.py | 5 ++- pandas/tseries/tests/test_converter.py | 42 ++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index d059d229ef22e..ad0bac3be5aef 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -144,7 +144,10 @@ def _dt_to_float_ordinal(dt): preserving hours, minutes, seconds and microseconds. Return value is a :func:`float`. """ - base = dates.date2num(dt) + if isinstance(dt, np.ndarray) and com.is_datetime64_ns_dtype(dt): + base = dates.epoch2num(dt.astype(np.int64) / 1.0E9) + else: + base = dates.date2num(dt) return base diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index 29137f9cb3e50..902b9cb549e32 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -5,8 +5,11 @@ import nose import numpy as np +from numpy.testing import assert_almost_equal as np_assert_almost_equal +from pandas import Timestamp from pandas.compat import u import pandas.util.testing as tm +from pandas.tseries.offsets import Second, Milli, Micro try: import pandas.tseries.converter as converter @@ -46,9 +49,48 @@ def test_conversion(self): rs = self.dtc.convert('2012-1-1', None, None) self.assertEqual(rs, xp) + rs = self.dtc.convert(Timestamp('2012-1-1'), None, None) + self.assertEqual(rs, xp) + + def test_conversion_float(self): + decimals = 9 + + rs = self.dtc.convert(Timestamp('2012-1-1 01:02:03', tz='UTC'), None, None) + xp = converter.dates.date2num(Timestamp('2012-1-1 01:02:03', tz='UTC')) + np_assert_almost_equal(rs, xp, decimals) + + rs = self.dtc.convert(Timestamp('2012-1-1 09:02:03', tz='Asia/Hong_Kong'), None, None) + np_assert_almost_equal(rs, xp, decimals) + + rs = self.dtc.convert(datetime(2012, 1, 1, 1, 2, 3), None, None) + np_assert_almost_equal(rs, xp, decimals) + def test_time_formatter(self): self.tc(90000) + def test_dateindex_conversion(self): + decimals = 9 + + for freq in ('B', 'L', 'S'): + dateindex = tm.makeDateIndex(k = 10, freq = freq) + rs = self.dtc.convert(dateindex, None, None) + xp = converter.dates.date2num(dateindex) + np_assert_almost_equal(rs, xp, decimals) + + def test_resolution(self): + def _assert_less(ts1, ts2): + val1 = self.dtc.convert(ts1, None, None) + val2 = self.dtc.convert(ts2, None, None) + if not val1 < val2: + raise AssertionError('{0} is not less than {1}.'.format(val1, val2)) + + # Matplotlib's time representation using floats cannot distinguish intervals smaller + # than ~10 microsecond in the common range of years. + ts = Timestamp('2012-1-1') + _assert_less(ts, ts + Second()) + _assert_less(ts, ts + Milli()) + _assert_less(ts, ts + Micro(50)) + if __name__ == '__main__': import nose From dc95c4cd3e582b328e81d3de7fd7a3934767ea7d Mon Sep 17 00:00:00 2001 From: agijsberts Date: Sun, 16 Mar 2014 14:33:19 +0100 Subject: [PATCH 2/4] Added vbench performance benchmark for speed up of DatetimeConverter. --- vb_suite/timeseries.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py index c43d2fb76dbdb..93821c3be3c2c 100644 --- a/vb_suite/timeseries.py +++ b/vb_suite/timeseries.py @@ -269,3 +269,15 @@ def date_range(start=None, end=None, periods=None, freq=None): dataframe_resample_max_numpy = \ Benchmark("df.resample('1s', how=np.max)", setup) + +#---------------------------------------------------------------------- +# DatetimeConverter + +setup = common_setup + """ +from pandas.tseries.converter import DatetimeConverter +""" + +datetimeindex_converter = \ + Benchmark('DatetimeConverter.convert(rng, None, None)', + setup, start_date=datetime(2013, 1, 1)) + From 35f1dea0a8eb6cf3ec897cae1c386ca353ce29db Mon Sep 17 00:00:00 2001 From: agijsberts Date: Mon, 17 Mar 2014 16:11:12 +0100 Subject: [PATCH 3/4] Fixes to DatetimeConverter speed up following PR comments and corresponding note in release notes.. --- doc/source/release.rst | 2 ++ pandas/tseries/converter.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 2583b47d9b3bf..6f9aa1c01fc37 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -159,6 +159,8 @@ Improvements to existing features - ``StataWriter`` and ``DataFrame.to_stata`` accept time stamp and data labels (:issue:`6545`) - offset/freq info now in Timestamp __repr__ (:issue:`4553`) - Support passing ``encoding`` with xlwt (:issue:`3710`) +- Performance improvement when converting ``DatetimeIndex`` to floating ordinals + using ``DatetimeConverter`` (:issue:`6636`) .. _release.bug_fixes-0.14.0: diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index ad0bac3be5aef..5b425fb4c90c4 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -16,6 +16,7 @@ import pandas.core.common as com from pandas.core.index import Index +from pandas import Series from pandas.tseries.index import date_range import pandas.tseries.tools as tools import pandas.tseries.frequencies as frequencies @@ -144,8 +145,8 @@ def _dt_to_float_ordinal(dt): preserving hours, minutes, seconds and microseconds. Return value is a :func:`float`. """ - if isinstance(dt, np.ndarray) and com.is_datetime64_ns_dtype(dt): - base = dates.epoch2num(dt.astype(np.int64) / 1.0E9) + if isinstance(dt, (np.ndarray, Series)) and com.is_datetime64_ns_dtype(dt): + base = dates.epoch2num(dt.asi8 / 1.0E9) else: base = dates.date2num(dt) return base From 64d9e9207bfea97121d44c41abe42e9fd0726f6c Mon Sep 17 00:00:00 2001 From: agijsberts Date: Mon, 17 Mar 2014 17:08:35 +0100 Subject: [PATCH 4/4] Fix failed tests when importing Series from pandas. --- pandas/tseries/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 5b425fb4c90c4..b9939976fded8 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -16,7 +16,7 @@ import pandas.core.common as com from pandas.core.index import Index -from pandas import Series +from pandas.core.series import Series from pandas.tseries.index import date_range import pandas.tseries.tools as tools import pandas.tseries.frequencies as frequencies