1
- from .pandas_vb_common import *
1
+ import numpy as np
2
+ import pandas .util .testing as tm
3
+ from pandas import date_range , DatetimeIndex , Index , MultiIndex , RangeIndex
4
+
5
+ from .pandas_vb_common import setup # noqa
2
6
3
7
4
8
class SetOperations (object ):
9
+
5
10
goal_time = 0.2
6
11
7
12
def setup (self ):
8
- self .rng = date_range ('1/1/2000' , periods = 10000 , freq = 'T' )
9
- self .rng2 = self .rng [:(- 1 )]
13
+ self .dates_left = date_range ('1/1/2000' , periods = 10000 , freq = 'T' )
14
+ self .dates_right = self .dates_left [:(- 1 )]
10
15
11
- # object index with datetime values
12
- if (self .rng .dtype == object ):
13
- self .idx_rng = self .rng .view (Index )
14
- else :
15
- self .idx_rng = self .rng .astype (object )
16
- self .idx_rng2 = self .idx_rng [:(- 1 )]
16
+ fmt = '%Y-%m-%d %H:%M:%S'
17
+ self .date_str_left = Index (self .dates_left .strftime (fmt ))
18
+ self .date_str_right = self .date_str_left [:- 1 ]
17
19
18
20
# other datetime
19
21
N = 100000
20
22
A = N - 20000
21
23
B = N + 20000
22
- self .dtidx1 = DatetimeIndex (range (N ))
23
- self .dtidx2 = DatetimeIndex (range (A , B ))
24
- self .dtidx3 = DatetimeIndex (range (N , B ))
25
-
26
- # integer
27
- self .N = 1000000
28
- self .options = np .arange (self .N )
29
- self .left = Index (
30
- self .options .take (np .random .permutation (self .N )[:(self .N // 2 )]))
31
- self .right = Index (
32
- self .options .take (np .random .permutation (self .N )[:(self .N // 2 )]))
33
-
34
- # strings
35
- N = 10000
36
- strs = tm .rands_array (10 , N )
37
- self .leftstr = Index (strs [:N * 2 // 3 ])
38
- self .rightstr = Index (strs [N // 3 :])
24
+ self .datetime_left = DatetimeIndex (range (N ))
25
+ self .datetime_right = DatetimeIndex (range (A , B ))
26
+ self .datetime_right2 = DatetimeIndex (range (N , B ))
27
+
28
+ options = np .arange (N )
29
+ self .int_left = Index (options .take (np .random .permutation (N )[:N // 2 ]))
30
+ self .int_right = Index (options .take (np .random .permutation (N )[:N // 2 ]))
31
+
32
+ strs = tm .rands_array (10 , N / 10 )
33
+ self .str_left = Index (strs [:N / 10 * 2 // 3 ])
34
+ self .str_right = Index (strs [N / 10 // 3 :])
39
35
40
36
def time_datetime_intersection (self ):
41
- self .rng .intersection (self .rng2 )
37
+ self .dates_left .intersection (self .dates_right )
42
38
43
39
def time_datetime_union (self ):
44
- self .rng .union (self .rng2 )
40
+ self .dates_left .union (self .dates_right )
45
41
46
42
def time_datetime_difference (self ):
47
- self .dtidx1 .difference (self .dtidx2 )
43
+ self .datetime_left .difference (self .datetime_right )
48
44
49
45
def time_datetime_difference_disjoint (self ):
50
- self .dtidx1 .difference (self .dtidx3 )
46
+ self .datetime_left .difference (self .datetime_right2 )
51
47
52
48
def time_datetime_symmetric_difference (self ):
53
- self .dtidx1 .symmetric_difference (self .dtidx2 )
49
+ self .datetime_left .symmetric_difference (self .datetime_right )
54
50
55
51
def time_index_datetime_intersection (self ):
56
- self .idx_rng .intersection (self .idx_rng2 )
52
+ self .date_str_left .intersection (self .date_str_right )
57
53
58
54
def time_index_datetime_union (self ):
59
- self .idx_rng .union (self .idx_rng2 )
55
+ self .date_str_left .union (self .date_str_right )
60
56
61
57
def time_int64_intersection (self ):
62
- self .left .intersection (self .right )
58
+ self .int_left .intersection (self .int_right )
63
59
64
60
def time_int64_union (self ):
65
- self .left .union (self .right )
61
+ self .int_left .union (self .int_right )
66
62
67
63
def time_int64_difference (self ):
68
- self .left .difference (self .right )
64
+ self .int_left .difference (self .int_right )
69
65
70
66
def time_int64_symmetric_difference (self ):
71
- self .left .symmetric_difference (self .right )
67
+ self .int_left .symmetric_difference (self .int_right )
72
68
73
69
def time_str_difference (self ):
74
- self .leftstr .difference (self .rightstr )
70
+ self .str_left .difference (self .str_right )
75
71
76
72
def time_str_symmetric_difference (self ):
77
- self .leftstr .symmetric_difference (self .rightstr )
73
+ self .str_left .symmetric_difference (self .str_right )
78
74
79
75
80
76
class Datetime (object ):
77
+
81
78
goal_time = 0.2
82
79
83
80
def setup (self ):
84
- self .dr = pd . date_range ('20000101' , freq = 'D' , periods = 10000 )
81
+ self .dr = date_range ('20000101' , freq = 'D' , periods = 10000 )
85
82
86
83
def time_is_dates_only (self ):
87
84
self .dr ._is_dates_only
88
85
89
86
90
- class Float64 (object ):
91
- goal_time = 0.2
87
+ class Ops (object ):
92
88
93
- def setup (self ):
94
- self .idx = tm .makeFloatIndex (1000000 )
95
- self .mask = ((np .arange (self .idx .size ) % 3 ) == 0 )
96
- self .series_mask = Series (self .mask )
89
+ sample_time = 0.2
90
+ params = ['float' , 'int' ]
91
+ param_names = ['dtype' ]
97
92
98
- self .baseidx = np .arange (1000000.0 )
93
+ def setup (self , dtype ):
94
+ N = 10 ** 6
95
+ indexes = {'int' : 'makeIntIndex' , 'float' : 'makeFloatIndex' }
96
+ self .index = getattr (tm , indexes [dtype ])(N )
99
97
100
- def time_boolean_indexer (self ):
101
- self .idx [ self . mask ]
98
+ def time_add (self , dtype ):
99
+ self .index + 2
102
100
103
- def time_boolean_series_indexer (self ):
104
- self .idx [ self . series_mask ]
101
+ def time_subtract (self , dtype ):
102
+ self .index - 2
105
103
106
- def time_construct (self ):
107
- Index ( self .baseidx )
104
+ def time_multiply (self , dtype ):
105
+ self .index * 2
108
106
109
- def time_div (self ):
110
- ( self .idx / 2 )
107
+ def time_divide (self , dtype ):
108
+ self .index / 2
111
109
112
- def time_get (self ):
113
- self .idx [ 1 ]
110
+ def time_modulo (self , dtype ):
111
+ self .index % 2
114
112
115
- def time_mul (self ):
116
- (self .idx * 2 )
117
113
118
- def time_slice_indexer_basic (self ):
119
- self .idx [:(- 1 )]
114
+ class Duplicated (object ):
120
115
121
- def time_slice_indexer_even (self ):
122
- self .idx [::2 ]
123
-
124
-
125
- class StringIndex (object ):
126
116
goal_time = 0.2
127
117
128
118
def setup (self ):
129
- self .idx = tm .makeStringIndex (1000000 )
130
- self .mask = ((np .arange (1000000 ) % 3 ) == 0 )
131
- self .series_mask = Series (self .mask )
132
-
133
- def time_boolean_indexer (self ):
134
- self .idx [self .mask ]
135
-
136
- def time_boolean_series_indexer (self ):
137
- self .idx [self .series_mask ]
138
-
139
- def time_slice_indexer_basic (self ):
140
- self .idx [:(- 1 )]
141
-
142
- def time_slice_indexer_even (self ):
143
- self .idx [::2 ]
144
-
145
-
146
- class Multi1 (object ):
147
- goal_time = 0.2
148
-
149
- def setup (self ):
150
- (n , k ) = (200 , 5000 )
151
- self .levels = [np .arange (n ), tm .makeStringIndex (n ).values , (1000 + np .arange (n ))]
152
- self .labels = [np .random .choice (n , (k * n )) for lev in self .levels ]
153
- self .mi = MultiIndex (levels = self .levels , labels = self .labels )
154
-
155
- self .iterables = [tm .makeStringIndex (10000 ), range (20 )]
119
+ n , k = 200 , 5000
120
+ levels = [np .arange (n ),
121
+ tm .makeStringIndex (n ).values ,
122
+ 1000 + np .arange (n )]
123
+ labels = [np .random .choice (n , (k * n )) for lev in levels ]
124
+ self .mi = MultiIndex (levels = levels , labels = labels )
156
125
157
126
def time_duplicated (self ):
158
127
self .mi .duplicated ()
159
128
160
- def time_from_product (self ):
161
- MultiIndex .from_product (self .iterables )
162
129
130
+ class Sortlevel (object ):
163
131
164
- class Multi2 (object ):
165
132
goal_time = 0.2
166
133
167
134
def setup (self ):
168
- self . n = (((( 3 * 5 ) * 7 ) * 11 ) * ( 1 << 10 ))
169
- ( low , high ) = ((( - 1 ) << 12 ), ( 1 << 12 ))
170
- self . f = ( lambda k : np .repeat (np .random .randint (low , high , (self . n // k )), k ) )
171
- self . i = np . random . permutation ( self . n )
172
- self .mi = MultiIndex .from_arrays ([ self . f ( 11 ), self . f ( 7 ), self . f ( 5 ), self . f ( 3 ), self . f ( 1 )])[ self . i ]
135
+ n = 10 ** 6
136
+ low , high = - 5000 , 5000
137
+ arrs = [ np .repeat (np .random .randint (low , high , (n // k )), k )
138
+ for k in [ 11 , 7 , 5 , 3 , 1 ]]
139
+ self .mi_int = MultiIndex .from_arrays (arrs )[ np . random . permutation ( n ) ]
173
140
174
- self . a = np .repeat (np .arange (100 ), 1000 )
175
- self . b = np .tile (np .arange (1000 ), 100 )
176
- self .midx2 = MultiIndex .from_arrays ([self . a , self . b ])
177
- self .midx2 = self .midx2 .take (np .random .permutation (np .arange (100000 )))
141
+ a = np .repeat (np .arange (100 ), 1000 )
142
+ b = np .tile (np .arange (1000 ), 100 )
143
+ self .mi = MultiIndex .from_arrays ([a , b ])
144
+ self .mi = self .mi .take (np .random .permutation (np .arange (n / 10 )))
178
145
179
146
def time_sortlevel_int64 (self ):
180
- self .mi .sortlevel ()
147
+ self .mi_int .sortlevel ()
181
148
182
149
def time_sortlevel_zero (self ):
183
- self .midx2 .sortlevel (0 )
150
+ self .mi .sortlevel (0 )
184
151
185
152
def time_sortlevel_one (self ):
186
- self .midx2 .sortlevel (1 )
153
+ self .mi .sortlevel (1 )
154
+
187
155
156
+ class MultiIndexValues (object ):
188
157
189
- class Multi3 (object ):
190
158
goal_time = 0.2
191
159
192
- def setup (self ):
193
- self .level1 = range (1000 )
194
- self .level2 = date_range (start = '1/1/2012' , periods = 100 )
195
- self .mi = MultiIndex .from_product ([self .level1 , self .level2 ])
160
+ def setup_cache (self ):
196
161
197
- def time_datetime_level_values_full (self ):
198
- self .mi .copy ().values
162
+ level1 = range (1000 )
163
+ level2 = date_range (start = '1/1/2012' , periods = 100 )
164
+ mi = MultiIndex .from_product ([level1 , level2 ])
165
+ return mi
199
166
200
- def time_datetime_level_values_sliced (self ):
201
- self .mi [:10 ].values
167
+ def time_datetime_level_values_copy (self , mi ):
168
+ mi .copy ().values
169
+
170
+ def time_datetime_level_values_sliced (self , mi ):
171
+ mi [:10 ].values
202
172
203
173
204
174
class Range (object ):
175
+
205
176
goal_time = 0.2
206
177
207
178
def setup (self ):
@@ -221,20 +192,32 @@ def time_min_trivial(self):
221
192
self .idx_inc .min ()
222
193
223
194
224
- class IndexOps (object ):
195
+ class IndexAppend (object ):
196
+
225
197
goal_time = 0.2
226
198
227
199
def setup (self ):
228
- N = 10000
229
- self .ridx = [RangeIndex (i * 100 , (i + 1 ) * 100 ) for i in range (N )]
230
- self .iidx = [idx .astype (int ) for idx in self .ridx ]
231
- self .oidx = [idx .astype (str ) for idx in self .iidx ]
232
200
233
- def time_concat_range (self ):
234
- self .ridx [0 ].append (self .ridx [1 :])
235
-
236
- def time_concat_int (self ):
237
- self .iidx [0 ].append (self .iidx [1 :])
238
-
239
- def time_concat_obj (self ):
240
- self .oidx [0 ].append (self .oidx [1 :])
201
+ N = 10000
202
+ self .range_idx = RangeIndex (0 , 100 )
203
+ self .int_idx = self .range_idx .astype (int )
204
+ self .obj_idx = self .int_idx .astype (str )
205
+ self .range_idxs = []
206
+ self .int_idxs = []
207
+ self .object_idxs = []
208
+ for i in range (1 , N ):
209
+ r_idx = RangeIndex (i * 100 , (i + 1 ) * 100 )
210
+ self .range_idxs .append (r_idx )
211
+ i_idx = r_idx .astype (int )
212
+ self .int_idxs .append (i_idx )
213
+ o_idx = i_idx .astype (str )
214
+ self .object_idxs .append (o_idx )
215
+
216
+ def time_append_range_list (self ):
217
+ self .range_idx .append (self .range_idxs )
218
+
219
+ def time_append_int_list (self ):
220
+ self .int_idx .append (self .int_idxs )
221
+
222
+ def time_append_obj_list (self ):
223
+ self .obj_idx .append (self .object_idxs )
0 commit comments