Skip to content

Commit f50ebf4

Browse files
committed
post merge with master
2 parents b0b4d8e + be66ef8 commit f50ebf4

38 files changed

+689
-286
lines changed

asv_bench/benchmarks/binary_ops.py

Lines changed: 44 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
1-
from .pandas_vb_common import *
1+
import numpy as np
2+
from pandas import DataFrame, Series, date_range
23
try:
34
import pandas.core.computation.expressions as expr
45
except ImportError:
56
import pandas.computation.expressions as expr
67

78

89
class Ops(object):
10+
911
goal_time = 0.2
1012

1113
params = [[True, False], ['default', 1]]
1214
param_names = ['use_numexpr', 'threads']
1315

1416
def setup(self, use_numexpr, threads):
17+
np.random.seed(1234)
1518
self.df = DataFrame(np.random.randn(20000, 100))
1619
self.df2 = DataFrame(np.random.randn(20000, 100))
1720

@@ -20,94 +23,88 @@ def setup(self, use_numexpr, threads):
2023
if not use_numexpr:
2124
expr.set_use_numexpr(False)
2225

23-
2426
def time_frame_add(self, use_numexpr, threads):
25-
(self.df + self.df2)
27+
self.df + self.df2
2628

2729
def time_frame_mult(self, use_numexpr, threads):
28-
(self.df * self.df2)
30+
self.df * self.df2
2931

3032
def time_frame_multi_and(self, use_numexpr, threads):
31-
self.df[((self.df > 0) & (self.df2 > 0))]
33+
self.df[(self.df > 0) & (self.df2 > 0)]
3234

3335
def time_frame_comparison(self, use_numexpr, threads):
34-
(self.df > self.df2)
36+
self.df > self.df2
3537

3638
def teardown(self, use_numexpr, threads):
3739
expr.set_use_numexpr(True)
3840
expr.set_numexpr_threads()
3941

4042

4143
class Ops2(object):
44+
4245
goal_time = 0.2
4346

4447
def setup(self):
45-
self.df = DataFrame(np.random.randn(1000, 1000))
46-
self.df2 = DataFrame(np.random.randn(1000, 1000))
48+
N = 10**3
49+
np.random.seed(1234)
50+
self.df = DataFrame(np.random.randn(N, N))
51+
self.df2 = DataFrame(np.random.randn(N, N))
4752

48-
self.df_int = DataFrame(
49-
np.random.random_integers(np.iinfo(np.int16).min,
50-
np.iinfo(np.int16).max,
51-
size=(1000, 1000)))
52-
self.df2_int = DataFrame(
53-
np.random.random_integers(np.iinfo(np.int16).min,
54-
np.iinfo(np.int16).max,
55-
size=(1000, 1000)))
53+
self.df_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
54+
np.iinfo(np.int16).max,
55+
size=(N, N)))
56+
self.df2_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
57+
np.iinfo(np.int16).max,
58+
size=(N, N)))
5659

57-
## Division
60+
# Division
5861

5962
def time_frame_float_div(self):
60-
(self.df // self.df2)
63+
self.df // self.df2
6164

6265
def time_frame_float_div_by_zero(self):
63-
(self.df / 0)
66+
self.df / 0
6467

6568
def time_frame_float_floor_by_zero(self):
66-
(self.df // 0)
69+
self.df // 0
6770

6871
def time_frame_int_div_by_zero(self):
69-
(self.df_int / 0)
72+
self.df_int / 0
7073

71-
## Modulo
74+
# Modulo
7275

7376
def time_frame_int_mod(self):
74-
(self.df / self.df2)
77+
self.df_int % self.df2_int
7578

7679
def time_frame_float_mod(self):
77-
(self.df / self.df2)
80+
self.df % self.df2
7881

7982

8083
class Timeseries(object):
84+
8185
goal_time = 0.2
8286

83-
def setup(self):
84-
self.N = 1000000
87+
params = [None, 'US/Eastern']
88+
param_names = ['tz']
89+
90+
def setup(self, tz):
91+
self.N = 10**6
8592
self.halfway = ((self.N // 2) - 1)
86-
self.s = Series(date_range('20010101', periods=self.N, freq='T'))
93+
self.s = Series(date_range('20010101', periods=self.N, freq='T',
94+
tz=tz))
8795
self.ts = self.s[self.halfway]
8896

89-
self.s2 = Series(date_range('20010101', periods=self.N, freq='s'))
97+
self.s2 = Series(date_range('20010101', periods=self.N, freq='s',
98+
tz=tz))
9099

91-
def time_series_timestamp_compare(self):
92-
(self.s <= self.ts)
100+
def time_series_timestamp_compare(self, tz):
101+
self.s <= self.ts
93102

94-
def time_timestamp_series_compare(self):
95-
(self.ts >= self.s)
103+
def time_timestamp_series_compare(self, tz):
104+
self.ts >= self.s
96105

97-
def time_timestamp_ops_diff1(self):
106+
def time_timestamp_ops_diff(self, tz):
98107
self.s2.diff()
99108

100-
def time_timestamp_ops_diff2(self):
101-
(self.s - self.s.shift())
102-
103-
104-
105-
class TimeseriesTZ(Timeseries):
106-
107-
def setup(self):
108-
self.N = 1000000
109-
self.halfway = ((self.N // 2) - 1)
110-
self.s = Series(date_range('20010101', periods=self.N, freq='T', tz='US/Eastern'))
111-
self.ts = self.s[self.halfway]
112-
113-
self.s2 = Series(date_range('20010101', periods=self.N, freq='s', tz='US/Eastern'))
109+
def time_timestamp_ops_diff_with_shift(self, tz):
110+
self.s - self.s.shift()

asv_bench/benchmarks/categoricals.py

Lines changed: 82 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from .pandas_vb_common import *
1+
import numpy as np
2+
import pandas as pd
3+
import pandas.util.testing as tm
24
try:
35
from pandas.api.types import union_categoricals
46
except ImportError:
@@ -8,107 +10,136 @@
810
pass
911

1012

11-
class Categoricals(object):
13+
class Concat(object):
14+
1215
goal_time = 0.2
1316

1417
def setup(self):
15-
N = 100000
16-
self.s = pd.Series((list('aabbcd') * N)).astype('category')
18+
N = 10**5
19+
self.s = pd.Series(list('aabbcd') * N).astype('category')
20+
21+
self.a = pd.Categorical(list('aabbcd') * N)
22+
self.b = pd.Categorical(list('bbcdjk') * N)
23+
24+
def time_concat(self):
25+
pd.concat([self.s, self.s])
26+
27+
def time_union(self):
28+
union_categoricals([self.a, self.b])
29+
1730

18-
self.a = pd.Categorical((list('aabbcd') * N))
19-
self.b = pd.Categorical((list('bbcdjk') * N))
31+
class Constructor(object):
2032

33+
goal_time = 0.2
34+
35+
def setup(self):
36+
N = 10**5
2137
self.categories = list('abcde')
22-
self.cat_idx = Index(self.categories)
38+
self.cat_idx = pd.Index(self.categories)
2339
self.values = np.tile(self.categories, N)
2440
self.codes = np.tile(range(len(self.categories)), N)
2541

26-
self.datetimes = pd.Series(pd.date_range(
27-
'1995-01-01 00:00:00', periods=10000, freq='s'))
42+
self.datetimes = pd.Series(pd.date_range('1995-01-01 00:00:00',
43+
periods=N / 10,
44+
freq='s'))
45+
self.datetimes_with_nat = self.datetimes.copy()
46+
self.datetimes_with_nat.iloc[-1] = pd.NaT
2847

2948
self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
3049
self.values_all_nan = [np.nan] * len(self.values)
3150

32-
def time_concat(self):
33-
concat([self.s, self.s])
34-
35-
def time_union(self):
36-
union_categoricals([self.a, self.b])
51+
def time_regular(self):
52+
pd.Categorical(self.values, self.categories)
3753

38-
def time_constructor_regular(self):
39-
Categorical(self.values, self.categories)
54+
def time_fastpath(self):
55+
pd.Categorical(self.codes, self.cat_idx, fastpath=True)
4056

41-
def time_constructor_fastpath(self):
42-
Categorical(self.codes, self.cat_idx, fastpath=True)
57+
def time_datetimes(self):
58+
pd.Categorical(self.datetimes)
4359

44-
def time_constructor_datetimes(self):
45-
Categorical(self.datetimes)
60+
def time_datetimes_with_nat(self):
61+
pd.Categorical(self.datetimes_with_nat)
4662

47-
def time_constructor_datetimes_with_nat(self):
48-
t = self.datetimes
49-
t.iloc[-1] = pd.NaT
50-
Categorical(t)
63+
def time_with_nan(self):
64+
pd.Categorical(self.values_some_nan)
5165

52-
def time_constructor_with_nan(self):
53-
Categorical(self.values_some_nan)
66+
def time_all_nan(self):
67+
pd.Categorical(self.values_all_nan)
5468

55-
def time_constructor_all_nan(self):
56-
Categorical(self.values_all_nan)
5769

70+
class ValueCounts(object):
5871

59-
class Categoricals2(object):
6072
goal_time = 0.2
6173

62-
def setup(self):
63-
n = 500000
74+
params = [True, False]
75+
param_names = ['dropna']
76+
77+
def setup(self, dropna):
78+
n = 5 * 10**5
6479
np.random.seed(2718281)
6580
arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
66-
self.ts = Series(arr).astype('category')
81+
self.ts = pd.Series(arr).astype('category')
82+
83+
def time_value_counts(self, dropna):
84+
self.ts.value_counts(dropna=dropna)
85+
6786

68-
self.sel = self.ts.loc[[0]]
87+
class Repr(object):
6988

70-
def time_value_counts(self):
71-
self.ts.value_counts(dropna=False)
89+
goal_time = 0.2
7290

73-
def time_value_counts_dropna(self):
74-
self.ts.value_counts(dropna=True)
91+
def setup(self):
92+
self.sel = pd.Series(['s1234']).astype('category')
7593

7694
def time_rendering(self):
7795
str(self.sel)
7896

97+
98+
class SetCategories(object):
99+
100+
goal_time = 0.2
101+
102+
def setup(self):
103+
n = 5 * 10**5
104+
np.random.seed(2718281)
105+
arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
106+
self.ts = pd.Series(arr).astype('category')
107+
79108
def time_set_categories(self):
80109
self.ts.cat.set_categories(self.ts.cat.categories[::2])
81110

82111

83-
class Categoricals3(object):
112+
class Rank(object):
113+
84114
goal_time = 0.2
85115

86116
def setup(self):
87-
N = 100000
117+
N = 10**5
88118
ncats = 100
119+
np.random.seed(1234)
89120

90-
self.s1 = Series(np.array(tm.makeCategoricalIndex(N, ncats)))
91-
self.s1_cat = self.s1.astype('category')
92-
self.s1_cat_ordered = self.s1.astype('category', ordered=True)
121+
self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
122+
self.s_str_cat = self.s_str.astype('category')
123+
self.s_str_cat_ordered = self.s_str.astype('category', ordered=True)
93124

94-
self.s2 = Series(np.random.randint(0, ncats, size=N))
95-
self.s2_cat = self.s2.astype('category')
96-
self.s2_cat_ordered = self.s2.astype('category', ordered=True)
125+
self.s_int = pd.Series(np.random.randint(0, ncats, size=N))
126+
self.s_int_cat = self.s_int.astype('category')
127+
self.s_int_cat_ordered = self.s_int.astype('category', ordered=True)
97128

98129
def time_rank_string(self):
99-
self.s1.rank()
130+
self.s_str.rank()
100131

101132
def time_rank_string_cat(self):
102-
self.s1_cat.rank()
133+
self.s_str_cat.rank()
103134

104135
def time_rank_string_cat_ordered(self):
105-
self.s1_cat_ordered.rank()
136+
self.s_str_cat_ordered.rank()
106137

107138
def time_rank_int(self):
108-
self.s2.rank()
139+
self.s_int.rank()
109140

110141
def time_rank_int_cat(self):
111-
self.s2_cat.rank()
142+
self.s_int_cat.rank()
112143

113144
def time_rank_int_cat_ordered(self):
114-
self.s2_cat_ordered.rank()
145+
self.s_int_cat_ordered.rank()

asv_bench/benchmarks/ctors.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,34 @@
1-
from .pandas_vb_common import *
1+
import numpy as np
2+
from pandas import DataFrame, Series, Index, DatetimeIndex, Timestamp
23

34

45
class Constructors(object):
6+
57
goal_time = 0.2
68

79
def setup(self):
8-
self.arr = np.random.randn(100, 100)
10+
N = 10**2
11+
np.random.seed(1234)
12+
self.arr = np.random.randn(N, N)
913
self.arr_str = np.array(['foo', 'bar', 'baz'], dtype=object)
1014

11-
self.data = np.random.randn(100)
12-
self.index = Index(np.arange(100))
15+
self.data = np.random.randn(N)
16+
self.index = Index(np.arange(N))
1317

14-
self.s = Series(([Timestamp('20110101'), Timestamp('20120101'),
15-
Timestamp('20130101')] * 1000))
18+
self.s = Series([Timestamp('20110101'), Timestamp('20120101'),
19+
Timestamp('20130101')] * N * 10)
1620

1721
def time_frame_from_ndarray(self):
1822
DataFrame(self.arr)
1923

2024
def time_series_from_ndarray(self):
21-
pd.Series(self.data, index=self.index)
25+
Series(self.data, index=self.index)
2226

2327
def time_index_from_array_string(self):
2428
Index(self.arr_str)
2529

2630
def time_dtindex_from_series(self):
2731
DatetimeIndex(self.s)
2832

29-
def time_dtindex_from_series2(self):
33+
def time_dtindex_from_index_with_series(self):
3034
Index(self.s)

0 commit comments

Comments
 (0)