-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Update asv config + fix some broken benchmarks #12563
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e083c01
448b36a
65db647
8cba84d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,192 +3,36 @@ | |
import pandas.computation.expressions as expr | ||
|
||
|
||
class eval_frame_add_all_threads(object): | ||
class eval_frame(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
|
||
def time_eval_frame_add_all_threads(self): | ||
pd.eval('df + df2 + df3 + df4') | ||
|
||
|
||
class eval_frame_add_one_thread(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
expr.set_numexpr_threads(1) | ||
|
||
def time_eval_frame_add_one_thread(self): | ||
pd.eval('df + df2 + df3 + df4') | ||
|
||
|
||
class eval_frame_add_python(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
|
||
def time_eval_frame_add_python(self): | ||
pd.eval('df + df2 + df3 + df4', engine='python') | ||
|
||
|
||
class eval_frame_add_python_one_thread(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
expr.set_numexpr_threads(1) | ||
|
||
def time_eval_frame_add_python_one_thread(self): | ||
pd.eval('df + df2 + df3 + df4', engine='python') | ||
|
||
|
||
class eval_frame_and_all_threads(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
|
||
def time_eval_frame_and_all_threads(self): | ||
pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)') | ||
|
||
|
||
class eval_frame_and_python_one_thread(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
expr.set_numexpr_threads(1) | ||
|
||
def time_eval_frame_and_python_one_thread(self): | ||
pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python') | ||
|
||
|
||
class eval_frame_and_python(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
|
||
def time_eval_frame_and_python(self): | ||
pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python') | ||
|
||
|
||
class eval_frame_chained_cmp_all_threads(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
|
||
def time_eval_frame_chained_cmp_all_threads(self): | ||
pd.eval('df < df2 < df3 < df4') | ||
|
||
|
||
class eval_frame_chained_cmp_python_one_thread(object): | ||
goal_time = 0.2 | ||
params = [['numexpr', 'python'], [1, 'all']] | ||
param_names = ['engine', 'threads'] | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
expr.set_numexpr_threads(1) | ||
|
||
def time_eval_frame_chained_cmp_python_one_thread(self): | ||
pd.eval('df < df2 < df3 < df4', engine='python') | ||
|
||
|
||
class eval_frame_chained_cmp_python(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
def setup(self, engine, threads): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
|
||
def time_eval_frame_chained_cmp_python(self): | ||
pd.eval('df < df2 < df3 < df4', engine='python') | ||
if threads == 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm maybe should set this explicity each time something is run to 1 (then you can set in this particular one to higher) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure what would be a good value for threads=='all', could you explain how you'd like it. Note each benchmark is run in its own process, so those with threads!=1 get the default value on pandas startup. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, each is in own thread, then ok. I just didn't want to set a 'global', but if spinning off proc then its fine. |
||
expr.set_numexpr_threads(1) | ||
|
||
def time_add(self, engine, threads): | ||
df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4 | ||
pd.eval('df + df2 + df3 + df4', engine=engine) | ||
|
||
class eval_frame_mult_all_threads(object): | ||
goal_time = 0.2 | ||
def time_and(self, engine, threads): | ||
df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4 | ||
pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine=engine) | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
|
||
def time_eval_frame_mult_all_threads(self): | ||
pd.eval('df * df2 * df3 * df4') | ||
|
||
|
||
class eval_frame_mult_one_thread(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
expr.set_numexpr_threads(1) | ||
|
||
def time_eval_frame_mult_one_thread(self): | ||
pd.eval('df * df2 * df3 * df4') | ||
|
||
|
||
class eval_frame_mult_python(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
|
||
def time_eval_frame_mult_python(self): | ||
pd.eval('df * df2 * df3 * df4', engine='python') | ||
|
||
|
||
class eval_frame_mult_python_one_thread(object): | ||
goal_time = 0.2 | ||
|
||
def setup(self): | ||
self.df = DataFrame(np.random.randn(20000, 100)) | ||
self.df2 = DataFrame(np.random.randn(20000, 100)) | ||
self.df3 = DataFrame(np.random.randn(20000, 100)) | ||
self.df4 = DataFrame(np.random.randn(20000, 100)) | ||
expr.set_numexpr_threads(1) | ||
def time_chained_cmp(self, engine, threads): | ||
df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4 | ||
pd.eval('df < df2 < df3 < df4', engine=engine) | ||
|
||
def time_eval_frame_mult_python_one_thread(self): | ||
pd.eval('df * df2 * df3 * df4', engine='python') | ||
def time_mult(self, engine, threads): | ||
df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4 | ||
pd.eval('df * df2 * df3 * df4', engine=engine) | ||
|
||
|
||
class query_datetime_index(object): | ||
|
@@ -203,6 +47,7 @@ def setup(self): | |
self.df = DataFrame({'a': np.random.randn(self.N), }, index=self.index) | ||
|
||
def time_query_datetime_index(self): | ||
ts = self.ts | ||
self.df.query('index < @ts') | ||
|
||
|
||
|
@@ -218,6 +63,7 @@ def setup(self): | |
self.df = DataFrame({'dates': self.s.values, }) | ||
|
||
def time_query_datetime_series(self): | ||
ts = self.ts | ||
self.df.query('dates < @ts') | ||
|
||
|
||
|
@@ -236,4 +82,5 @@ def setup(self): | |
self.max_val = self.df['a'].max() | ||
|
||
def time_query_with_boolean_selection(self): | ||
self.df.query('(a >= @min_val) & (a <= @max_val)') | ||
min_val, max_val = self.min_val, self.max_val | ||
self.df.query('(a >= @min_val) & (a <= @max_val)') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so how does one choose the installation method / use this, ideally would ha e examples for conda (default) / pip