Skip to content

Commit 82a20b8

Browse files
committed
Add more cache conditions
1 parent 169e3e1 commit 82a20b8

File tree

3 files changed

+54
-8
lines changed

3 files changed

+54
-8
lines changed

asv_bench/benchmarks/timeseries.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,8 @@ def setup(self):
356356

357357
self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
358358
self.s2 = self.s.str.replace(':\\S+$', '')
359+
self.numeric_data = Series([range(100000)])
360+
self.datetime_data = [dt.datetime(2010, 1, 1)] * 100000
359361

360362
def time_format_YYYYMMDD(self):
361363
to_datetime(self.stringsD, format='%Y%m%d')
@@ -381,6 +383,12 @@ def time_format_exact(self):
381383
def time_format_no_exact(self):
382384
to_datetime(self.s, format='%d%b%y', exact=False)
383385

386+
def time_cache_numeric_data(self):
387+
to_datetime(self.numeric_data)
388+
389+
def time_cache_datetime_data(self):
390+
to_datetime(self.datetime_data)
391+
384392

385393
class Offsets(object):
386394
goal_time = 0.2

pandas/core/tools/datetimes.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
is_float,
2222
is_list_like,
2323
is_scalar,
24-
is_numeric_dtype)
24+
is_numeric_dtype,
25+
is_string_dtype)
2526
from pandas.core.dtypes.generic import (
2627
ABCIndexClass, ABCSeries,
2728
ABCDataFrame)
@@ -373,14 +374,19 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
373374
arg = arg + offset
374375

375376
convert_cache = None
376-
if cache and is_list_like(arg) and not isinstance(arg, DatetimeIndex):
377+
if cache and is_list_like(arg):
378+
# Create a cache only if there are more than 10k values and the user
379+
# passes in datestrings
380+
min_cache_threshold = 10**5
381+
if len(arg) >= min_cache_threshold and is_string_dtype(arg):
377382
# unique currently cannot determine dates that are out of bounds
378-
# use the cache only if the data is a string and there are more than 10**5 values
379-
unique_dates = algorithms.unique(arg)
380-
if len(unique_dates) != len(arg):
381-
from pandas import Series
382-
cache_data = _convert_listlike(unique_dates, True, format)
383-
convert_cache = Series(cache_data, index=unique_dates)
383+
# recurison errors with datetime
384+
unique_dates = algorithms.unique(arg)
385+
# Essentially they need to all be the same value
386+
if len(unique_dates) == 1:
387+
from pandas import Series
388+
cache_data = _convert_listlike(unique_dates, True, format)
389+
convert_cache = Series(cache_data, index=unique_dates)
384390

385391
if isinstance(arg, tslib.Timestamp):
386392
result = arg

pandas/tests/indexes/datetimes/test_tools.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,38 @@ def test_datetime_invalid_datatype(self):
371371
pd.to_datetime(bool)
372372
with pytest.raises(TypeError):
373373
pd.to_datetime(pd.to_datetime)
374+
375+
@pytest.mark.parametrize("utc", [True, None])
376+
@pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
377+
@pytest.mark.parametrize("box", [True, False])
378+
@pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index])
379+
def test_to_datetime_cache(self, utc, format, box, constructor):
380+
date = '20130101 00:00:00'
381+
test_dates = [date] * 10**5
382+
data = constructor(test_dates)
383+
result = pd.to_datetime(data, utc=utc, format=format, box=box)
384+
expected = pd.to_datetime(data, utc=utc, format=format, box=box,
385+
cache=False)
386+
if box:
387+
tm.assert_index_equal(result, expected)
388+
else:
389+
tm.assert_numpy_array_equal(result, expected)
390+
391+
@pytest.mark.parametrize("utc", [True, None])
392+
@pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
393+
def test_to_datetime_cache_series(self, utc, format):
394+
date = '20130101 00:00:00'
395+
test_dates = [date] * 10**5
396+
data = pd.Series(test_dates)
397+
result = pd.to_datetime(data, utc=utc, format=format, cache=True)
398+
expected = pd.to_datetime(data, utc=utc, format=format)
399+
tm.assert_series_equal(result, expected)
400+
401+
def test_to_datetime_cache_scalar(self):
402+
date = '20130101 00:00:00'
403+
result = pd.to_datetime(date, cache=True)
404+
expected = pd.Timestamp('20130101 00:00:00')
405+
assert result == expected
374406

375407

376408
class TestToDatetimeUnit(object):

0 commit comments

Comments
 (0)