Skip to content

Commit 3d6e307

Browse files
committed
CLN: ASV index object benchmark
1 parent 9705a48 commit 3d6e307

File tree

3 files changed

+169
-132
lines changed

3 files changed

+169
-132
lines changed

asv_bench/benchmarks/ctors.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import numpy as np
2-
from pandas import DataFrame, Series, Index, DatetimeIndex, Timestamp
2+
import pandas.util.testing as tm
3+
from pandas import (DataFrame, Series, Index, DatetimeIndex, Timestamp,
4+
MultiIndex)
35

46
from .pandas_vb_common import setup # noqa
57

@@ -9,7 +11,7 @@ class Constructors(object):
911
goal_time = 0.2
1012

1113
def setup(self):
12-
N = 10**2
14+
N = 10**4
1315
self.arr = np.random.randn(N, N)
1416
self.arr_str = np.array(['foo', 'bar', 'baz'], dtype=object)
1517

@@ -19,6 +21,8 @@ def setup(self):
1921
self.s = Series([Timestamp('20110101'), Timestamp('20120101'),
2022
Timestamp('20130101')] * N * 10)
2123

24+
self.iterables = [tm.makeStringIndex(N), range(20)]
25+
2226
def time_frame_from_ndarray(self):
2327
DataFrame(self.arr)
2428

@@ -28,8 +32,14 @@ def time_series_from_ndarray(self):
2832
def time_index_from_array_string(self):
2933
Index(self.arr_str)
3034

35+
def time_index_from_array_floats(self):
36+
Index(self.arr)
37+
3138
def time_dtindex_from_series(self):
3239
DatetimeIndex(self.s)
3340

3441
def time_dtindex_from_index_with_series(self):
3542
Index(self.s)
43+
44+
def time_multiindex_from_iterables(self):
45+
MultiIndex.from_product(self.iterables)

asv_bench/benchmarks/index_object.py

Lines changed: 113 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1,207 +1,178 @@
1-
from .pandas_vb_common import *
1+
import numpy as np
2+
import pandas.util.testing as tm
3+
from pandas import date_range, DatetimeIndex, Index, MultiIndex, RangeIndex
4+
5+
from .pandas_vb_common import setup # noqa
26

37

48
class SetOperations(object):
9+
510
goal_time = 0.2
611

712
def setup(self):
8-
self.rng = date_range('1/1/2000', periods=10000, freq='T')
9-
self.rng2 = self.rng[:(-1)]
13+
self.dates_left = date_range('1/1/2000', periods=10000, freq='T')
14+
self.dates_right = self.dates_left[:(-1)]
1015

11-
# object index with datetime values
12-
if (self.rng.dtype == object):
13-
self.idx_rng = self.rng.view(Index)
14-
else:
15-
self.idx_rng = self.rng.astype(object)
16-
self.idx_rng2 = self.idx_rng[:(-1)]
16+
fmt = '%Y-%m-%d %H:%M:%S'
17+
self.date_str_left = Index(self.dates_left.strftime(fmt))
18+
self.date_str_right = self.date_str_left[:-1]
1719

1820
# other datetime
1921
N = 100000
2022
A = N - 20000
2123
B = N + 20000
22-
self.dtidx1 = DatetimeIndex(range(N))
23-
self.dtidx2 = DatetimeIndex(range(A, B))
24-
self.dtidx3 = DatetimeIndex(range(N, B))
25-
26-
# integer
27-
self.N = 1000000
28-
self.options = np.arange(self.N)
29-
self.left = Index(
30-
self.options.take(np.random.permutation(self.N)[:(self.N // 2)]))
31-
self.right = Index(
32-
self.options.take(np.random.permutation(self.N)[:(self.N // 2)]))
33-
34-
# strings
35-
N = 10000
36-
strs = tm.rands_array(10, N)
37-
self.leftstr = Index(strs[:N * 2 // 3])
38-
self.rightstr = Index(strs[N // 3:])
24+
self.datetime_left = DatetimeIndex(range(N))
25+
self.datetime_right = DatetimeIndex(range(A, B))
26+
self.datetime_right2 = DatetimeIndex(range(N, B))
27+
28+
options = np.arange(N)
29+
self.int_left = Index(options.take(np.random.permutation(N)[:N // 2]))
30+
self.int_right = Index(options.take(np.random.permutation(N)[:N // 2]))
31+
32+
strs = tm.rands_array(10, N / 10)
33+
self.str_left = Index(strs[:N / 10 * 2 // 3])
34+
self.str_right = Index(strs[N / 10 // 3:])
3935

4036
def time_datetime_intersection(self):
41-
self.rng.intersection(self.rng2)
37+
self.dates_left.intersection(self.dates_right)
4238

4339
def time_datetime_union(self):
44-
self.rng.union(self.rng2)
40+
self.dates_left.union(self.dates_right)
4541

4642
def time_datetime_difference(self):
47-
self.dtidx1.difference(self.dtidx2)
43+
self.datetime_left.difference(self.datetime_right)
4844

4945
def time_datetime_difference_disjoint(self):
50-
self.dtidx1.difference(self.dtidx3)
46+
self.datetime_left.difference(self.datetime_right2)
5147

5248
def time_datetime_symmetric_difference(self):
53-
self.dtidx1.symmetric_difference(self.dtidx2)
49+
self.datetime_left.symmetric_difference(self.datetime_right)
5450

5551
def time_index_datetime_intersection(self):
56-
self.idx_rng.intersection(self.idx_rng2)
52+
self.date_str_left.intersection(self.date_str_right)
5753

5854
def time_index_datetime_union(self):
59-
self.idx_rng.union(self.idx_rng2)
55+
self.date_str_left.union(self.date_str_right)
6056

6157
def time_int64_intersection(self):
62-
self.left.intersection(self.right)
58+
self.int_left.intersection(self.int_right)
6359

6460
def time_int64_union(self):
65-
self.left.union(self.right)
61+
self.int_left.union(self.int_right)
6662

6763
def time_int64_difference(self):
68-
self.left.difference(self.right)
64+
self.int_left.difference(self.int_right)
6965

7066
def time_int64_symmetric_difference(self):
71-
self.left.symmetric_difference(self.right)
67+
self.int_left.symmetric_difference(self.int_right)
7268

7369
def time_str_difference(self):
74-
self.leftstr.difference(self.rightstr)
70+
self.str_left.difference(self.str_right)
7571

7672
def time_str_symmetric_difference(self):
77-
self.leftstr.symmetric_difference(self.rightstr)
73+
self.str_left.symmetric_difference(self.str_right)
7874

7975

8076
class Datetime(object):
77+
8178
goal_time = 0.2
8279

8380
def setup(self):
84-
self.dr = pd.date_range('20000101', freq='D', periods=10000)
81+
self.dr = date_range('20000101', freq='D', periods=10000)
8582

8683
def time_is_dates_only(self):
8784
self.dr._is_dates_only
8885

8986

90-
class Float64(object):
91-
goal_time = 0.2
87+
class Ops(object):
9288

93-
def setup(self):
94-
self.idx = tm.makeFloatIndex(1000000)
95-
self.mask = ((np.arange(self.idx.size) % 3) == 0)
96-
self.series_mask = Series(self.mask)
89+
sample_time = 0.2
90+
params = ['float', 'int']
91+
param_names = ['dtype']
9792

98-
self.baseidx = np.arange(1000000.0)
93+
def setup(self, dtype):
94+
N = 10**6
95+
indexes = {'int': 'makeIntIndex', 'float': 'makeFloatIndex'}
96+
self.index = getattr(tm, indexes[dtype])(N)
9997

100-
def time_boolean_indexer(self):
101-
self.idx[self.mask]
98+
def time_add(self, dtype):
99+
self.index + 2
102100

103-
def time_boolean_series_indexer(self):
104-
self.idx[self.series_mask]
101+
def time_subtract(self, dtype):
102+
self.index - 2
105103

106-
def time_construct(self):
107-
Index(self.baseidx)
104+
def time_multiply(self, dtype):
105+
self.index * 2
108106

109-
def time_div(self):
110-
(self.idx / 2)
107+
def time_divide(self, dtype):
108+
self.index / 2
111109

112-
def time_get(self):
113-
self.idx[1]
110+
def time_modulo(self, dtype):
111+
self.index % 2
114112

115-
def time_mul(self):
116-
(self.idx * 2)
117113

118-
def time_slice_indexer_basic(self):
119-
self.idx[:(-1)]
114+
class Duplicated(object):
120115

121-
def time_slice_indexer_even(self):
122-
self.idx[::2]
123-
124-
125-
class StringIndex(object):
126116
goal_time = 0.2
127117

128118
def setup(self):
129-
self.idx = tm.makeStringIndex(1000000)
130-
self.mask = ((np.arange(1000000) % 3) == 0)
131-
self.series_mask = Series(self.mask)
132-
133-
def time_boolean_indexer(self):
134-
self.idx[self.mask]
135-
136-
def time_boolean_series_indexer(self):
137-
self.idx[self.series_mask]
138-
139-
def time_slice_indexer_basic(self):
140-
self.idx[:(-1)]
141-
142-
def time_slice_indexer_even(self):
143-
self.idx[::2]
144-
145-
146-
class Multi1(object):
147-
goal_time = 0.2
148-
149-
def setup(self):
150-
(n, k) = (200, 5000)
151-
self.levels = [np.arange(n), tm.makeStringIndex(n).values, (1000 + np.arange(n))]
152-
self.labels = [np.random.choice(n, (k * n)) for lev in self.levels]
153-
self.mi = MultiIndex(levels=self.levels, labels=self.labels)
154-
155-
self.iterables = [tm.makeStringIndex(10000), range(20)]
119+
n, k = 200, 5000
120+
levels = [np.arange(n),
121+
tm.makeStringIndex(n).values,
122+
1000 + np.arange(n)]
123+
labels = [np.random.choice(n, (k * n)) for lev in levels]
124+
self.mi = MultiIndex(levels=levels, labels=labels)
156125

157126
def time_duplicated(self):
158127
self.mi.duplicated()
159128

160-
def time_from_product(self):
161-
MultiIndex.from_product(self.iterables)
162129

130+
class Sortlevel(object):
163131

164-
class Multi2(object):
165132
goal_time = 0.2
166133

167134
def setup(self):
168-
self.n = ((((3 * 5) * 7) * 11) * (1 << 10))
169-
(low, high) = (((-1) << 12), (1 << 12))
170-
self.f = (lambda k: np.repeat(np.random.randint(low, high, (self.n // k)), k))
171-
self.i = np.random.permutation(self.n)
172-
self.mi = MultiIndex.from_arrays([self.f(11), self.f(7), self.f(5), self.f(3), self.f(1)])[self.i]
135+
n = 10**6
136+
low, high = -5000, 5000
137+
arrs = [np.repeat(np.random.randint(low, high, (n // k)), k)
138+
for k in [11, 7, 5, 3, 1]]
139+
self.mi_int = MultiIndex.from_arrays(arrs)[np.random.permutation(n)]
173140

174-
self.a = np.repeat(np.arange(100), 1000)
175-
self.b = np.tile(np.arange(1000), 100)
176-
self.midx2 = MultiIndex.from_arrays([self.a, self.b])
177-
self.midx2 = self.midx2.take(np.random.permutation(np.arange(100000)))
141+
a = np.repeat(np.arange(100), 1000)
142+
b = np.tile(np.arange(1000), 100)
143+
self.mi = MultiIndex.from_arrays([a, b])
144+
self.mi = self.mi.take(np.random.permutation(np.arange(n / 10)))
178145

179146
def time_sortlevel_int64(self):
180-
self.mi.sortlevel()
147+
self.mi_int.sortlevel()
181148

182149
def time_sortlevel_zero(self):
183-
self.midx2.sortlevel(0)
150+
self.mi.sortlevel(0)
184151

185152
def time_sortlevel_one(self):
186-
self.midx2.sortlevel(1)
153+
self.mi.sortlevel(1)
154+
187155

156+
class MultiIndexValues(object):
188157

189-
class Multi3(object):
190158
goal_time = 0.2
191159

192-
def setup(self):
193-
self.level1 = range(1000)
194-
self.level2 = date_range(start='1/1/2012', periods=100)
195-
self.mi = MultiIndex.from_product([self.level1, self.level2])
160+
def setup_cache(self):
196161

197-
def time_datetime_level_values_full(self):
198-
self.mi.copy().values
162+
level1 = range(1000)
163+
level2 = date_range(start='1/1/2012', periods=100)
164+
mi = MultiIndex.from_product([level1, level2])
165+
return mi
199166

200-
def time_datetime_level_values_sliced(self):
201-
self.mi[:10].values
167+
def time_datetime_level_values_copy(self, mi):
168+
mi.copy().values
169+
170+
def time_datetime_level_values_sliced(self, mi):
171+
mi[:10].values
202172

203173

204174
class Range(object):
175+
205176
goal_time = 0.2
206177

207178
def setup(self):
@@ -221,20 +192,32 @@ def time_min_trivial(self):
221192
self.idx_inc.min()
222193

223194

224-
class IndexOps(object):
195+
class IndexAppend(object):
196+
225197
goal_time = 0.2
226198

227199
def setup(self):
228-
N = 10000
229-
self.ridx = [RangeIndex(i * 100, (i + 1) * 100) for i in range(N)]
230-
self.iidx = [idx.astype(int) for idx in self.ridx]
231-
self.oidx = [idx.astype(str) for idx in self.iidx]
232200

233-
def time_concat_range(self):
234-
self.ridx[0].append(self.ridx[1:])
235-
236-
def time_concat_int(self):
237-
self.iidx[0].append(self.iidx[1:])
238-
239-
def time_concat_obj(self):
240-
self.oidx[0].append(self.oidx[1:])
201+
N = 10000
202+
self.range_idx = RangeIndex(0, 100)
203+
self.int_idx = self.range_idx.astype(int)
204+
self.obj_idx = self.int_idx.astype(str)
205+
self.range_idxs = []
206+
self.int_idxs = []
207+
self.object_idxs = []
208+
for i in range(1, N):
209+
r_idx = RangeIndex(i * 100, (i + 1) * 100)
210+
self.range_idxs.append(r_idx)
211+
i_idx = r_idx.astype(int)
212+
self.int_idxs.append(i_idx)
213+
o_idx = i_idx.astype(str)
214+
self.object_idxs.append(o_idx)
215+
216+
def time_append_range_list(self):
217+
self.range_idx.append(self.range_idxs)
218+
219+
def time_append_int_list(self):
220+
self.int_idx.append(self.int_idxs)
221+
222+
def time_append_obj_list(self):
223+
self.obj_idx.append(self.object_idxs)

0 commit comments

Comments
 (0)