Skip to content

Commit a61751c

Browse files
committed
Move caching function outside to_datetime
1 parent 5d194de commit a61751c

File tree

3 files changed

+43
-55
lines changed

3 files changed

+43
-55
lines changed

pandas/core/indexes/datetimes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ def __new__(cls, data=None,
339339
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
340340
is_integer_dtype(data)):
341341
data = tools.to_datetime(data, dayfirst=dayfirst,
342-
yearfirst=yearfirst, cache=False)
342+
yearfirst=yearfirst)
343343

344344
if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
345345

pandas/core/tools/datetimes.py

Lines changed: 38 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,35 @@ def _guess_datetime_format_for_array(arr, **kwargs):
3535
if len(non_nan_elements):
3636
return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs)
3737

38+
def _maybe_cache(arg, format, cache, tz, _convert_listlike):
39+
"""Create a cache of unique dates from an array of dates"""
40+
from pandas import Series
41+
cache_array = Series()
42+
if cache:
43+
# Perform a quicker unique check
44+
from pandas import Index
45+
if not Index(arg).is_unique:
46+
unique_dates = algorithms.unique(arg)
47+
cache_dates = _convert_listlike(unique_dates, True, format,
48+
tz=tz)
49+
cache_array = Series(cache_dates, index=unique_dates)
50+
return cache_array
51+
52+
def _convert_and_box_cache(arg, cache_array, box, name=None):
53+
"""Convert array of dates with a cache and box the result"""
54+
from pandas import Series
55+
from pandas.core.indexes.datetimes import DatetimeIndex
56+
result = Series(arg).map(cache_array)
57+
if box:
58+
result = DatetimeIndex(result, name=name)
59+
else:
60+
result = result.values
61+
return result
3862

3963
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
4064
utc=None, box=True, format=None, exact=True,
4165
unit=None, infer_datetime_format=False, origin='unix',
42-
cache=True):
66+
cache=False):
4367
"""
4468
Convert argument to datetime.
4569
@@ -310,51 +334,6 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
310334
except (ValueError, TypeError):
311335
raise e
312336

313-
def _maybe_convert_cache(arg, cache, box, format, name=None, tz=tz):
314-
"""
315-
Try to convert the datetimelike arg using
316-
a cache of converted dates.
317-
318-
Parameters
319-
----------
320-
arg : integer, float, string, datetime, list, tuple, 1-d array, Series
321-
Datetime argument to convert
322-
cache : boolean
323-
If True, try to convert the dates with a cache
324-
If False, short circuit and return None
325-
Flag whether to cache the converted dates
326-
box : boolean
327-
If True, return a DatetimeIndex
328-
if False, return an ndarray of values
329-
tz : String or None
330-
'utc' if UTC=True was passed else None
331-
name : String, default None
332-
DatetimeIndex name
333-
Returns
334-
-------
335-
Series if original argument was a Series
336-
DatetimeIndex if box=True and original argument was not a Series
337-
ndarray if box=False and original argument was not a Series
338-
None if the conversion failed
339-
"""
340-
if cache and is_list_like(arg) and len(arg) >= 1000:
341-
# Perform a quicker unique check
342-
from pandas import Index
343-
if not Index(arg).is_unique:
344-
unique_dates = algorithms.unique(arg)
345-
from pandas import Series
346-
cache_dates = _convert_listlike(unique_dates, True, format,
347-
tz=tz)
348-
convert_cache = Series(cache_dates, index=unique_dates)
349-
result = Series(arg, name=name).map(convert_cache)
350-
if isinstance(arg, Series):
351-
return result
352-
elif box:
353-
return DatetimeIndex(result, name=name)
354-
else:
355-
return result.values
356-
return None
357-
358337
if arg is None:
359338
return None
360339

@@ -419,20 +398,27 @@ def _maybe_convert_cache(arg, cache, box, format, name=None, tz=tz):
419398
if isinstance(arg, tslib.Timestamp):
420399
result = arg
421400
elif isinstance(arg, ABCSeries):
422-
result = _maybe_convert_cache(arg, cache, box, format, name=arg.name)
423-
if result is None:
401+
cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
402+
if not cache_array.empty:
403+
result = arg.map(cache_array)
404+
else:
424405
from pandas import Series
425406
values = _convert_listlike(arg._values, True, format)
426407
result = Series(values, index=arg.index, name=arg.name)
427408
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
428409
result = _assemble_from_unit_mappings(arg, errors=errors)
429410
elif isinstance(arg, ABCIndexClass):
430-
result = _maybe_convert_cache(arg, cache, box, format, name=arg.name)
431-
if result is None:
411+
cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
412+
if not cache_array.empty:
413+
result = _convert_and_box_cache(arg, cache_array, box,
414+
name=arg.name)
415+
else:
432416
result = _convert_listlike(arg, box, format, name=arg.name)
433417
elif is_list_like(arg):
434-
result = _maybe_convert_cache(arg, cache, box, format)
435-
if result is None:
418+
cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
419+
if not cache_array.empty:
420+
result = _convert_and_box_cache(arg, cache_array, box)
421+
else:
436422
result = _convert_listlike(arg, box, format)
437423
else:
438424
result = _convert_listlike(np.array([arg]), box, format)[0]

pandas/tests/indexes/datetimes/test_tools.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,7 +1224,8 @@ def test_parsers(self, cache):
12241224
assert result3 is tslib.NaT
12251225
assert result4 is tslib.NaT
12261226

1227-
def test_parsers_dayfirst_yearfirst(self):
1227+
@pytest.mark.parametrize('cache', [True, False])
1228+
def test_parsers_dayfirst_yearfirst(self, cache):
12281229
# OK
12291230
# 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00
12301231
# 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00
@@ -1373,7 +1374,8 @@ def test_parsers_time(self):
13731374
assert isinstance(res, list)
13741375
assert res == expected_arr
13751376

1376-
def test_parsers_timezone_minute_offsets_roundtrip(self):
1377+
@pytest.mark.parametrize('cache', [True, False])
1378+
def test_parsers_timezone_minute_offsets_roundtrip(self, cache):
13771379
# GH11708
13781380
base = to_datetime("2013-01-01 00:00:00", cache=cache)
13791381
dt_strings = [

0 commit comments

Comments
 (0)