@@ -43,7 +43,7 @@ def _guess_datetime_format_for_array(arr, **kwargs):
43
43
return _guess_datetime_format (arr [non_nan_elements [0 ]], ** kwargs )
44
44
45
45
46
- def should_cache (arg , check_count : int , unique_share : float ):
46
+ def should_cache (arg , unique_share = 0.7 , check_count = None ):
47
47
"""
48
48
Decides whether to do caching.
49
49
@@ -53,23 +53,34 @@ def should_cache(arg, check_count: int, unique_share: float):
53
53
Parameters
54
54
----------
55
55
arg: listlike, tuple, 1-d array, Series
56
- check_count: int
57
- 0 <= check_count <= len(arg)
58
- unique_share: float
56
+ unique_share: float or None
59
57
0 < unique_share < 1
58
+ check_count: int or None
59
+ 0 <= check_count <= len(arg)
60
60
61
61
Returns
62
62
-------
63
63
do_caching: bool
64
64
"""
65
- assert 0 <= check_count <= len (arg ), ('check_count must be in next bounds:'
66
- ' [0; len(arg)]' )
67
- assert 0 < unique_share < 1 , 'unique_share must be in next bounds: (0; 1)'
65
+ do_caching = True
68
66
69
- if check_count == 0 :
70
- return False
67
+ # default realization
68
+ if check_count is None :
69
+ # in this case, the gain from caching is negligible
70
+ if len (arg ) <= 50 :
71
+ return False
71
72
72
- do_caching = True
73
+ if len (arg ) <= 5000 :
74
+ check_count = int (len (arg ) * 0.1 )
75
+ else :
76
+ check_count = 500
77
+ else :
78
+ assert 0 <= check_count <= len (arg ), \
79
+ 'check_count must be in next bounds: [0; len(arg)]'
80
+ assert 0 < unique_share < 1 , \
81
+ 'unique_share must be in next bounds: (0; 1)'
82
+ if check_count == 0 :
83
+ return False
73
84
74
85
unique_elements = unique (arg [:check_count ])
75
86
if len (unique_elements ) > check_count * unique_share :
@@ -102,7 +113,7 @@ def _maybe_cache(arg, format, cache, convert_listlike):
102
113
# Perform a quicker unique check
103
114
from pandas import Index
104
115
105
- if not should_cache (arg , int ( len ( arg ) * 0.1 ), 0.7 ):
116
+ if not should_cache (arg ):
106
117
return cache_array
107
118
108
119
unique_dates = Index (arg ).unique ()
0 commit comments