1
+ """
2
+ The functions benchmarked in this file depend _almost_ exclusively on
3
+ _libs, but not in a way that is easy to formalize.
4
+
5
+ If a PR does not change anything in pandas/_libs/ or pandas/core/tools/, then
6
+ it is likely that these benchmarks will be unaffected.
7
+ """
8
+
1
9
import numpy as np
2
10
3
11
from pandas import (
12
+ NaT ,
4
13
Series ,
14
+ date_range ,
15
+ to_datetime ,
5
16
to_numeric ,
17
+ to_timedelta ,
6
18
)
7
19
8
20
from .pandas_vb_common import (
@@ -69,6 +81,9 @@ def time_downcast(self, dtype, downcast):
69
81
70
82
71
83
class MaybeConvertNumeric :
84
+ # maybe_convert_numeric depends _exclusively_ on _libs, could
85
+ # go in benchmarks/libs.py
86
+
72
87
def setup_cache (self ):
73
88
N = 10 ** 6
74
89
arr = np .repeat ([2 ** 63 ], N ) + np .arange (N ).astype ("uint64" )
@@ -81,4 +96,205 @@ def time_convert(self, data):
81
96
lib .maybe_convert_numeric (data , set (), coerce_numeric = False )
82
97
83
98
99
+ class MaybeConvertObjects :
100
+ # maybe_convert_objects depends _almost_ exclusively on _libs, but
101
+ # does have some run-time imports from outside of _libs
102
+
103
+ def setup (self ):
104
+ N = 10 ** 5
105
+
106
+ data = list (range (N ))
107
+ data [0 ] = NaT
108
+ data = np .array (data )
109
+ self .data = data
110
+
111
+ def time_maybe_convert_objects (self ):
112
+ lib .maybe_convert_objects (self .data )
113
+
114
+
115
+ class ToDatetimeFromIntsFloats :
116
+ def setup (self ):
117
+ self .ts_sec = Series (range (1521080307 , 1521685107 ), dtype = "int64" )
118
+ self .ts_sec_float = self .ts_sec .astype ("float64" )
119
+
120
+ self .ts_nanosec = 1_000_000 * self .ts_sec
121
+ self .ts_nanosec_float = self .ts_nanosec .astype ("float64" )
122
+
123
+ # speed of int64 and float64 paths should be comparable
124
+
125
+ def time_nanosec_int64 (self ):
126
+ to_datetime (self .ts_nanosec , unit = "ns" )
127
+
128
+ def time_nanosec_float64 (self ):
129
+ to_datetime (self .ts_nanosec_float , unit = "ns" )
130
+
131
+ def time_sec_int64 (self ):
132
+ to_datetime (self .ts_sec , unit = "s" )
133
+
134
+ def time_sec_float64 (self ):
135
+ to_datetime (self .ts_sec_float , unit = "s" )
136
+
137
+
138
+ class ToDatetimeYYYYMMDD :
139
+ def setup (self ):
140
+ rng = date_range (start = "1/1/2000" , periods = 10000 , freq = "D" )
141
+ self .stringsD = Series (rng .strftime ("%Y%m%d" ))
142
+
143
+ def time_format_YYYYMMDD (self ):
144
+ to_datetime (self .stringsD , format = "%Y%m%d" )
145
+
146
+
147
+ class ToDatetimeCacheSmallCount :
148
+
149
+ params = ([True , False ], [50 , 500 , 5000 , 100000 ])
150
+ param_names = ["cache" , "count" ]
151
+
152
+ def setup (self , cache , count ):
153
+ rng = date_range (start = "1/1/1971" , periods = count )
154
+ self .unique_date_strings = rng .strftime ("%Y-%m-%d" ).tolist ()
155
+
156
+ def time_unique_date_strings (self , cache , count ):
157
+ to_datetime (self .unique_date_strings , cache = cache )
158
+
159
+
160
+ class ToDatetimeISO8601 :
161
+ def setup (self ):
162
+ rng = date_range (start = "1/1/2000" , periods = 20000 , freq = "H" )
163
+ self .strings = rng .strftime ("%Y-%m-%d %H:%M:%S" ).tolist ()
164
+ self .strings_nosep = rng .strftime ("%Y%m%d %H:%M:%S" ).tolist ()
165
+ self .strings_tz_space = [
166
+ x .strftime ("%Y-%m-%d %H:%M:%S" ) + " -0800" for x in rng
167
+ ]
168
+
169
+ def time_iso8601 (self ):
170
+ to_datetime (self .strings )
171
+
172
+ def time_iso8601_nosep (self ):
173
+ to_datetime (self .strings_nosep )
174
+
175
+ def time_iso8601_format (self ):
176
+ to_datetime (self .strings , format = "%Y-%m-%d %H:%M:%S" )
177
+
178
+ def time_iso8601_format_no_sep (self ):
179
+ to_datetime (self .strings_nosep , format = "%Y%m%d %H:%M:%S" )
180
+
181
+ def time_iso8601_tz_spaceformat (self ):
182
+ to_datetime (self .strings_tz_space )
183
+
184
+
185
+ class ToDatetimeNONISO8601 :
186
+ def setup (self ):
187
+ N = 10000
188
+ half = N // 2
189
+ ts_string_1 = "March 1, 2018 12:00:00+0400"
190
+ ts_string_2 = "March 1, 2018 12:00:00+0500"
191
+ self .same_offset = [ts_string_1 ] * N
192
+ self .diff_offset = [ts_string_1 ] * half + [ts_string_2 ] * half
193
+
194
+ def time_same_offset (self ):
195
+ to_datetime (self .same_offset )
196
+
197
+ def time_different_offset (self ):
198
+ to_datetime (self .diff_offset )
199
+
200
+
201
+ class ToDatetimeFormatQuarters :
202
+ def setup (self ):
203
+ self .s = Series (["2Q2005" , "2Q05" , "2005Q1" , "05Q1" ] * 10000 )
204
+
205
+ def time_infer_quarter (self ):
206
+ to_datetime (self .s )
207
+
208
+
209
+ class ToDatetimeFormat :
210
+ def setup (self ):
211
+ N = 100000
212
+ self .s = Series (["19MAY11" , "19MAY11:00:00:00" ] * N )
213
+ self .s2 = self .s .str .replace (":\\ S+$" , "" )
214
+
215
+ self .same_offset = ["10/11/2018 00:00:00.045-07:00" ] * N
216
+ self .diff_offset = [
217
+ f"10/11/2018 00:00:00.045-0{ offset } :00" for offset in range (10 )
218
+ ] * (N // 10 )
219
+
220
+ def time_exact (self ):
221
+ to_datetime (self .s2 , format = "%d%b%y" )
222
+
223
+ def time_no_exact (self ):
224
+ to_datetime (self .s , format = "%d%b%y" , exact = False )
225
+
226
+ def time_same_offset (self ):
227
+ to_datetime (self .same_offset , format = "%m/%d/%Y %H:%M:%S.%f%z" )
228
+
229
+ def time_different_offset (self ):
230
+ to_datetime (self .diff_offset , format = "%m/%d/%Y %H:%M:%S.%f%z" )
231
+
232
+ def time_same_offset_to_utc (self ):
233
+ to_datetime (self .same_offset , format = "%m/%d/%Y %H:%M:%S.%f%z" , utc = True )
234
+
235
+ def time_different_offset_to_utc (self ):
236
+ to_datetime (self .diff_offset , format = "%m/%d/%Y %H:%M:%S.%f%z" , utc = True )
237
+
238
+
239
+ class ToDatetimeCache :
240
+
241
+ params = [True , False ]
242
+ param_names = ["cache" ]
243
+
244
+ def setup (self , cache ):
245
+ N = 10000
246
+ self .unique_numeric_seconds = list (range (N ))
247
+ self .dup_numeric_seconds = [1000 ] * N
248
+ self .dup_string_dates = ["2000-02-11" ] * N
249
+ self .dup_string_with_tz = ["2000-02-11 15:00:00-0800" ] * N
250
+
251
+ def time_unique_seconds_and_unit (self , cache ):
252
+ to_datetime (self .unique_numeric_seconds , unit = "s" , cache = cache )
253
+
254
+ def time_dup_seconds_and_unit (self , cache ):
255
+ to_datetime (self .dup_numeric_seconds , unit = "s" , cache = cache )
256
+
257
+ def time_dup_string_dates (self , cache ):
258
+ to_datetime (self .dup_string_dates , cache = cache )
259
+
260
+ def time_dup_string_dates_and_format (self , cache ):
261
+ to_datetime (self .dup_string_dates , format = "%Y-%m-%d" , cache = cache )
262
+
263
+ def time_dup_string_tzoffset_dates (self , cache ):
264
+ to_datetime (self .dup_string_with_tz , cache = cache )
265
+
266
+
267
+ class ToTimedelta :
268
+ def setup (self ):
269
+ self .ints = np .random .randint (0 , 60 , size = 10000 )
270
+ self .str_days = []
271
+ self .str_seconds = []
272
+ for i in self .ints :
273
+ self .str_days .append (f"{ i } days" )
274
+ self .str_seconds .append (f"00:00:{ i :02d} " )
275
+
276
+ def time_convert_int (self ):
277
+ to_timedelta (self .ints , unit = "s" )
278
+
279
+ def time_convert_string_days (self ):
280
+ to_timedelta (self .str_days )
281
+
282
+ def time_convert_string_seconds (self ):
283
+ to_timedelta (self .str_seconds )
284
+
285
+
286
+ class ToTimedeltaErrors :
287
+
288
+ params = ["coerce" , "ignore" ]
289
+ param_names = ["errors" ]
290
+
291
+ def setup (self , errors ):
292
+ ints = np .random .randint (0 , 60 , size = 10000 )
293
+ self .arr = [f"{ i } days" for i in ints ]
294
+ self .arr [- 1 ] = "apple"
295
+
296
+ def time_convert (self , errors ):
297
+ to_timedelta (self .arr , errors = errors )
298
+
299
+
84
300
from .pandas_vb_common import setup # noqa: F401 isort:skip
0 commit comments