Skip to content

CLN: clean benchmarks to get them running #16025

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
2 changes: 1 addition & 1 deletion asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
// "pythons": ["2.7", "3.4"],
"pythons": ["2.7"],
"pythons": ["3.6"],

// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
Expand Down
13 changes: 9 additions & 4 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
import pandas as pd
from pandas.util import testing as tm

try:
from pandas.tools.hashing import hash_pandas_object
except ImportError:
pass


class Algorithms(object):
goal_time = 0.2
Expand Down Expand Up @@ -103,13 +108,13 @@ def setup(self):
self.df.iloc[10:20] = np.nan

def time_frame(self):
self.df.hash()
hash_pandas_object(self.df)

def time_series_int(self):
self.df.E.hash()
hash_pandas_object(self.df.E)

def time_series_string(self):
self.df.B.hash()
hash_pandas_object(self.df.B)

def time_series_categorical(self):
self.df.C.hash()
hash_pandas_object(self.df.C)
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/frame_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ def setup(self):
self.data = self.frame.to_dict()
except:
self.data = self.frame.toDict()
self.some_dict = self.data.values()[0]
self.some_dict = list(self.data.values())[0]
self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values]

self.data2 = dict(
((i, dict(((j, float(j)) for j in range(100)))) for i in
xrange(2000)))
range(2000)))

def time_frame_ctor_list_of_dict(self):
DataFrame(self.dict_list)
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def time_reindex_both_axes_ix(self):
self.df.ix[(self.idx, self.idx)]

def time_reindex_upcast(self):
self.df2.reindex(permutation(range(1200)))
self.df2.reindex(np.random.permutation(range(1200)))


#----------------------------------------------------------------------
Expand Down Expand Up @@ -583,7 +583,7 @@ class frame_assign_timeseries_index(object):
goal_time = 0.2

def setup(self):
self.idx = date_range('1/1/2000', periods=100000, freq='D')
self.idx = date_range('1/1/2000', periods=100000, freq='H')
self.df = DataFrame(randn(100000, 1), columns=['A'], index=self.idx)

def time_frame_assign_timeseries_index(self):
Expand Down
18 changes: 12 additions & 6 deletions asv_bench/benchmarks/gil.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
from .pandas_vb_common import *
from pandas.core import common as com

from pandas.core.algorithms import take_1d

try:
from cStringIO import StringIO
except ImportError:
from io import StringIO

try:
from pandas._libs import algos
except ImportError:
from pandas import algos

try:
from pandas.util.testing import test_parallel

Expand Down Expand Up @@ -167,11 +173,11 @@ def time_nogil_take1d_float64(self):

@test_parallel(num_threads=2)
def take_1d_pg2_int64(self):
com.take_1d(self.df.int64.values, self.indexer)
take_1d(self.df.int64.values, self.indexer)

@test_parallel(num_threads=2)
def take_1d_pg2_float64(self):
com.take_1d(self.df.float64.values, self.indexer)
take_1d(self.df.float64.values, self.indexer)


class nogil_take1d_int64(object):
Expand All @@ -193,11 +199,11 @@ def time_nogil_take1d_int64(self):

@test_parallel(num_threads=2)
def take_1d_pg2_int64(self):
com.take_1d(self.df.int64.values, self.indexer)
take_1d(self.df.int64.values, self.indexer)

@test_parallel(num_threads=2)
def take_1d_pg2_float64(self):
com.take_1d(self.df.float64.values, self.indexer)
take_1d(self.df.float64.values, self.indexer)


class nogil_kth_smallest(object):
Expand Down Expand Up @@ -226,7 +232,7 @@ class nogil_datetime_fields(object):

def setup(self):
self.N = 100000000
self.dti = pd.date_range('1900-01-01', periods=self.N, freq='D')
self.dti = pd.date_range('1900-01-01', periods=self.N, freq='T')
self.period = self.dti.to_period('D')
if (not have_real_test_parallel):
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def setup(self):

def get_test_data(self, ngroups=100, n=100000):
self.unique_groups = range(self.ngroups)
self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object)
self.arr = np.asarray(np.tile(self.unique_groups, int(n / self.ngroups)), dtype=object)
if (len(self.arr) < n):
self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object)
random.shuffle(self.arr)
Expand Down
48 changes: 23 additions & 25 deletions asv_bench/benchmarks/hdfstore_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,12 @@ def setup(self):
self.remove(self.f)

self.store = HDFStore(self.f)
self.store.put('df1', self.df)
self.store.put('df_mixed', self.df_mixed)

self.store.append('df5', self.df_mixed)
self.store.append('df7', self.df)

self.store.append('df9', self.df_wide)

self.store.append('df11', self.df_wide2)
self.store.append('df12', self.df2)
self.store.put('fixed', self.df)
self.store.put('fixed_mixed', self.df_mixed)
self.store.append('table', self.df2)
self.store.append('table_mixed', self.df_mixed)
self.store.append('table_wide', self.df_wide)
self.store.append('table_wide2', self.df_wide2)

def teardown(self):
self.store.close()
Expand All @@ -52,45 +48,47 @@ def remove(self, f):
pass

def time_read_store(self):
self.store.get('df1')
self.store.get('fixed')

def time_read_store_mixed(self):
self.store.get('df_mixed')
self.store.get('fixed_mixed')

def time_write_store(self):
self.store.put('df2', self.df)
self.store.put('fixed_write', self.df)

def time_write_store_mixed(self):
self.store.put('df_mixed2', self.df_mixed)
self.store.put('fixed_mixed_write', self.df_mixed)

def time_read_store_table_mixed(self):
self.store.select('df5')
self.store.select('table_mixed')

def time_write_store_table_mixed(self):
self.store.append('df6', self.df_mixed)
self.store.append('table_mixed_write', self.df_mixed)

def time_read_store_table(self):
self.store.select('df7')
self.store.select('table')

def time_write_store_table(self):
self.store.append('df8', self.df)
self.store.append('table_write', self.df)

def time_read_store_table_wide(self):
self.store.select('df9')
self.store.select('table_wide')

def time_write_store_table_wide(self):
self.store.append('df10', self.df_wide)
self.store.append('table_wide_write', self.df_wide)

def time_write_store_table_dc(self):
self.store.append('df15', self.df, data_columns=True)
self.store.append('table_dc_write', self.df_dc, data_columns=True)

def time_query_store_table_wide(self):
self.store.select('df11', [('index', '>', self.df_wide2.index[10000]),
('index', '<', self.df_wide2.index[15000])])
start = self.df_wide2.index[10000]
stop = self.df_wide2.index[15000]
self.store.select('table_wide', where="index > start and index < stop")

def time_query_store_table(self):
self.store.select('df12', [('index', '>', self.df2.index[10000]),
('index', '<', self.df2.index[15000])])
start = self.df2.index[10000]
stop = self.df2.index[15000]
self.store.select('table', where="index > start and index < stop")


class HDF5Panel(object):
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,5 @@ def setup(self):
self.na_values = set()

def time_convert(self):
pd.lib.maybe_convert_numeric(self.data, self.na_values,
coerce_numeric=False)
lib.maybe_convert_numeric(self.data, self.na_values,
coerce_numeric=False)
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,12 +314,12 @@ def setup(self):

self.df1 = pd.DataFrame(
{'time': np.random.randint(0, one_count / 20, one_count),
'key': np.random.choice(list(string.uppercase), one_count),
'key': np.random.choice(list(string.ascii_uppercase), one_count),
'key2': np.random.randint(0, 25, one_count),
'value1': np.random.randn(one_count)})
self.df2 = pd.DataFrame(
{'time': np.random.randint(0, two_count / 20, two_count),
'key': np.random.choice(list(string.uppercase), two_count),
'key': np.random.choice(list(string.ascii_uppercase), two_count),
'key2': np.random.randint(0, 25, two_count),
'value2': np.random.randn(two_count)})

Expand Down
16 changes: 9 additions & 7 deletions asv_bench/benchmarks/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,18 +153,20 @@ def time_packers_read_stata_with_validation(self):
class packers_read_sas(_Packers):

def setup(self):
self.f = os.path.join(os.path.dirname(__file__), '..', '..',
'pandas', 'io', 'tests', 'sas', 'data',
'test1.sas7bdat')
self.f2 = os.path.join(os.path.dirname(__file__), '..', '..',
'pandas', 'io', 'tests', 'sas', 'data',
'paxraw_d_short.xpt')

testdir = os.path.join(os.path.dirname(__file__), '..', '..',
'pandas', 'tests', 'io', 'sas')
if not os.path.exists(testdir):
testdir = os.path.join(os.path.dirname(__file__), '..', '..',
'pandas', 'io', 'tests', 'sas')
self.f = os.path.join(testdir, 'data', 'test1.sas7bdat')
self.f2 = os.path.join(testdir, 'data', 'paxraw_d_short.xpt')

def time_read_sas7bdat(self):
pd.read_sas(self.f, format='sas7bdat')

def time_read_xport(self):
pd.read_sas(self.f, format='xport')
pd.read_sas(self.f2, format='xport')


class CSV(_Packers):
Expand Down
4 changes: 1 addition & 3 deletions asv_bench/benchmarks/pandas_vb_common.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from pandas import *
import pandas as pd
from datetime import timedelta
from numpy.random import randn
from numpy.random import randint
from numpy.random import permutation
import pandas.util.testing as tm
import random
import numpy as np
Expand All @@ -18,7 +16,7 @@
np.random.seed(1234)

# try em until it works!
for imp in ['pandas_tseries', 'pandas.lib', 'pandas._libs.lib']:
for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']:
try:
lib = import_module(imp)
break
Expand Down
5 changes: 3 additions & 2 deletions asv_bench/benchmarks/panel_ctor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .pandas_vb_common import *
from datetime import timedelta


class Constructors1(object):
Expand All @@ -24,7 +25,7 @@ class Constructors2(object):
def setup(self):
self.data_frames = {}
for x in range(100):
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1)))
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D'))
self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr)
self.data_frames[x] = self.df

Expand All @@ -36,7 +37,7 @@ class Constructors3(object):
goal_time = 0.2

def setup(self):
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1)))
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D'))
self.data_frames = {}
for x in range(100):
self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr)
Expand Down
2 changes: 0 additions & 2 deletions asv_bench/benchmarks/replace.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from .pandas_vb_common import *
from pandas.compat import range
from datetime import timedelta


class replace_fillna(object):
Expand Down
3 changes: 1 addition & 2 deletions asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pandas.tseries.converter import DatetimeConverter
from .pandas_vb_common import *
import pandas as pd
from datetime import timedelta
import datetime as dt
try:
import pandas.tseries.holiday
Expand Down Expand Up @@ -57,7 +56,7 @@ def setup(self):
self.a = self.rng7[:50000].append(self.rng7[50002:])

def time_add_timedelta(self):
(self.rng + timedelta(minutes=2))
(self.rng + dt.timedelta(minutes=2))

def time_add_offset_delta(self):
(self.rng + self.delta_offset)
Expand Down