Skip to content

Commit 169e3e1

Browse files
committed
Move cache down the stack, explore threshold to trigger cache
1 parent 087a020 commit 169e3e1

File tree

1 file changed

+31
-15
lines changed

1 file changed

+31
-15
lines changed

pandas/core/tools/datetimes.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -206,16 +206,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
206206

207207
def _convert_listlike(arg, box, format, name=None, tz=tz):
208208

209-
datetime_cache = None
210-
if cache and is_list_like(arg) and not isinstance(arg, DatetimeIndex):
211-
unique_dates = algorithms.unique(arg)
212-
if len(unique_dates) != len(arg):
213-
datetime_cache = Series(pd.to_datetime(unique_dates,
214-
errors=errors, dayfirst=dayfirst,
215-
yearfirst=yearfirst, utc=utc, box=box, format=format,
216-
exact=exact, unit=unit,
217-
infer_datetime_format=infer_datetime_format,
218-
origin=origin, cache=False), index=unique_dates)
209+
219210
if isinstance(arg, (list, tuple)):
220211
arg = np.array(arg, dtype='O')
221212

@@ -381,18 +372,43 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
381372
arg = np.asarray(arg)
382373
arg = arg + offset
383374

375+
convert_cache = None
376+
if cache and is_list_like(arg) and not isinstance(arg, DatetimeIndex):
377+
# unique currently cannot determine dates that are out of bounds
378+
# use the cache only if the data is a string and there are more than 10**5 values
379+
unique_dates = algorithms.unique(arg)
380+
if len(unique_dates) != len(arg):
381+
from pandas import Series
382+
cache_data = _convert_listlike(unique_dates, True, format)
383+
convert_cache = Series(cache_data, index=unique_dates)
384+
384385
if isinstance(arg, tslib.Timestamp):
385386
result = arg
386387
elif isinstance(arg, ABCSeries):
387-
from pandas import Series
388-
values = _convert_listlike(arg._values, True, format)
389-
result = Series(values, index=arg.index, name=arg.name)
388+
if convert_cache is not None:
389+
result = arg.map(convert_cache)
390+
else:
391+
from pandas import Series
392+
values = _convert_listlike(arg._values, True, format)
393+
result = Series(values, index=arg.index, name=arg.name)
390394
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
391395
result = _assemble_from_unit_mappings(arg, errors=errors)
392396
elif isinstance(arg, ABCIndexClass):
393-
result = _convert_listlike(arg, box, format, name=arg.name)
397+
if convert_cache is not None:
398+
from pandas import Series
399+
result = Series(arg).map(convert_cache).values
400+
if box:
401+
result = DatetimeIndex(result, tz=tz, name=arg.name)
402+
else:
403+
result = _convert_listlike(arg, box, format, name=arg.name)
394404
elif is_list_like(arg):
395-
result = _convert_listlike(arg, box, format)
405+
if convert_cache is not None:
406+
from pandas import Series
407+
result = Series(arg).map(convert_cache).values
408+
if box:
409+
result = DatetimeIndex(result, tz=tz)
410+
else:
411+
result = _convert_listlike(arg, box, format)
396412
else:
397413
result = _convert_listlike(np.array([arg]), box, format)[0]
398414

0 commit comments

Comments
 (0)