diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 4fc6f9f634426..62f1c090a7462 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -26,7 +26,7 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["2.7", "3.4"], - "pythons": ["2.7"], + "pythons": ["3.6"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index fe657936c403e..0e2182c58d44c 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -2,6 +2,11 @@ import pandas as pd from pandas.util import testing as tm +try: + from pandas.tools.hashing import hash_pandas_object +except ImportError: + pass + class Algorithms(object): goal_time = 0.2 @@ -103,13 +108,13 @@ def setup(self): self.df.iloc[10:20] = np.nan def time_frame(self): - self.df.hash() + hash_pandas_object(self.df) def time_series_int(self): - self.df.E.hash() + hash_pandas_object(self.df.E) def time_series_string(self): - self.df.B.hash() + hash_pandas_object(self.df.B) def time_series_categorical(self): - self.df.C.hash() + hash_pandas_object(self.df.C) diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index 05c1a27fdf8ca..dec4fcba0eb5e 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -20,12 +20,12 @@ def setup(self): self.data = self.frame.to_dict() except: self.data = self.frame.toDict() - self.some_dict = self.data.values()[0] + self.some_dict = list(self.data.values())[0] self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values] self.data2 = dict( ((i, dict(((j, float(j)) for j in range(100)))) for i in - xrange(2000))) + range(2000))) def time_frame_ctor_list_of_dict(self): DataFrame(self.dict_list) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 9f491302a4d6f..af72ca1e9a6ab 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -56,7 +56,7 @@ def time_reindex_both_axes_ix(self): self.df.ix[(self.idx, self.idx)] def time_reindex_upcast(self): - self.df2.reindex(permutation(range(1200))) + self.df2.reindex(np.random.permutation(range(1200))) #---------------------------------------------------------------------- @@ -583,7 +583,7 @@ class frame_assign_timeseries_index(object): goal_time = 0.2 def setup(self): - self.idx = date_range('1/1/2000', periods=100000, freq='D') + self.idx = date_range('1/1/2000', periods=100000, freq='H') self.df = DataFrame(randn(100000, 1), columns=['A'], index=self.idx) def time_frame_assign_timeseries_index(self): diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 1c5e59672cb57..78a94976e732d 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -1,11 +1,17 @@ from .pandas_vb_common import * -from pandas.core import common as com + +from pandas.core.algorithms import take_1d try: from cStringIO import StringIO except ImportError: from io import StringIO +try: + from pandas._libs import algos +except ImportError: + from pandas import algos + try: from pandas.util.testing import test_parallel @@ -167,11 +173,11 @@ def time_nogil_take1d_float64(self): @test_parallel(num_threads=2) def take_1d_pg2_int64(self): - com.take_1d(self.df.int64.values, self.indexer) + take_1d(self.df.int64.values, self.indexer) @test_parallel(num_threads=2) def take_1d_pg2_float64(self): - com.take_1d(self.df.float64.values, self.indexer) + take_1d(self.df.float64.values, self.indexer) class nogil_take1d_int64(object): @@ -193,11 +199,11 @@ def time_nogil_take1d_int64(self): @test_parallel(num_threads=2) def take_1d_pg2_int64(self): - com.take_1d(self.df.int64.values, self.indexer) + take_1d(self.df.int64.values, self.indexer) @test_parallel(num_threads=2) def take_1d_pg2_float64(self): - com.take_1d(self.df.float64.values, self.indexer) + take_1d(self.df.float64.values, self.indexer) class nogil_kth_smallest(object): @@ -226,7 +232,7 @@ class nogil_datetime_fields(object): def setup(self): self.N = 100000000 - self.dti = pd.date_range('1900-01-01', periods=self.N, freq='D') + self.dti = pd.date_range('1900-01-01', periods=self.N, freq='T') self.period = self.dti.to_period('D') if (not have_real_test_parallel): raise NotImplementedError diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index b8d8e8b7912d7..c0c3a42cc4464 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -331,7 +331,7 @@ def setup(self): def get_test_data(self, ngroups=100, n=100000): self.unique_groups = range(self.ngroups) - self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object) + self.arr = np.asarray(np.tile(self.unique_groups, int(n / self.ngroups)), dtype=object) if (len(self.arr) < n): self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object) random.shuffle(self.arr) diff --git a/asv_bench/benchmarks/hdfstore_bench.py b/asv_bench/benchmarks/hdfstore_bench.py index 78de5267a2969..dc72f3d548aaf 100644 --- a/asv_bench/benchmarks/hdfstore_bench.py +++ b/asv_bench/benchmarks/hdfstore_bench.py @@ -31,16 +31,12 @@ def setup(self): self.remove(self.f) self.store = HDFStore(self.f) - self.store.put('df1', self.df) - self.store.put('df_mixed', self.df_mixed) - - self.store.append('df5', self.df_mixed) - self.store.append('df7', self.df) - - self.store.append('df9', self.df_wide) - - self.store.append('df11', self.df_wide2) - self.store.append('df12', self.df2) + self.store.put('fixed', self.df) + self.store.put('fixed_mixed', self.df_mixed) + self.store.append('table', self.df2) + self.store.append('table_mixed', self.df_mixed) + self.store.append('table_wide', self.df_wide) + self.store.append('table_wide2', self.df_wide2) def teardown(self): self.store.close() @@ -52,45 +48,47 @@ def remove(self, f): pass def time_read_store(self): - self.store.get('df1') + self.store.get('fixed') def time_read_store_mixed(self): - self.store.get('df_mixed') + self.store.get('fixed_mixed') def time_write_store(self): - self.store.put('df2', self.df) + self.store.put('fixed_write', self.df) def time_write_store_mixed(self): - self.store.put('df_mixed2', self.df_mixed) + self.store.put('fixed_mixed_write', self.df_mixed) def time_read_store_table_mixed(self): - self.store.select('df5') + self.store.select('table_mixed') def time_write_store_table_mixed(self): - self.store.append('df6', self.df_mixed) + self.store.append('table_mixed_write', self.df_mixed) def time_read_store_table(self): - self.store.select('df7') + self.store.select('table') def time_write_store_table(self): - self.store.append('df8', self.df) + self.store.append('table_write', self.df) def time_read_store_table_wide(self): - self.store.select('df9') + self.store.select('table_wide') def time_write_store_table_wide(self): - self.store.append('df10', self.df_wide) + self.store.append('table_wide_write', self.df_wide) def time_write_store_table_dc(self): - self.store.append('df15', self.df, data_columns=True) + self.store.append('table_dc_write', self.df_dc, data_columns=True) def time_query_store_table_wide(self): - self.store.select('df11', [('index', '>', self.df_wide2.index[10000]), - ('index', '<', self.df_wide2.index[15000])]) + start = self.df_wide2.index[10000] + stop = self.df_wide2.index[15000] + self.store.select('table_wide', where="index > start and index < stop") def time_query_store_table(self): - self.store.select('df12', [('index', '>', self.df2.index[10000]), - ('index', '<', self.df2.index[15000])]) + start = self.df2.index[10000] + stop = self.df2.index[15000] + self.store.select('table', where="index > start and index < stop") class HDF5Panel(object): diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 3635438a7f76b..dc1d6de73f8ae 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -113,5 +113,5 @@ def setup(self): self.na_values = set() def time_convert(self): - pd.lib.maybe_convert_numeric(self.data, self.na_values, - coerce_numeric=False) + lib.maybe_convert_numeric(self.data, self.na_values, + coerce_numeric=False) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 776316343e009..3b0e33b72ddc1 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -314,12 +314,12 @@ def setup(self): self.df1 = pd.DataFrame( {'time': np.random.randint(0, one_count / 20, one_count), - 'key': np.random.choice(list(string.uppercase), one_count), + 'key': np.random.choice(list(string.ascii_uppercase), one_count), 'key2': np.random.randint(0, 25, one_count), 'value1': np.random.randn(one_count)}) self.df2 = pd.DataFrame( {'time': np.random.randint(0, two_count / 20, two_count), - 'key': np.random.choice(list(string.uppercase), two_count), + 'key': np.random.choice(list(string.ascii_uppercase), two_count), 'key2': np.random.randint(0, 25, two_count), 'value2': np.random.randn(two_count)}) diff --git a/asv_bench/benchmarks/packers.py b/asv_bench/benchmarks/packers.py index cd43e305ead8f..24f80cc836dd4 100644 --- a/asv_bench/benchmarks/packers.py +++ b/asv_bench/benchmarks/packers.py @@ -153,18 +153,20 @@ def time_packers_read_stata_with_validation(self): class packers_read_sas(_Packers): def setup(self): - self.f = os.path.join(os.path.dirname(__file__), '..', '..', - 'pandas', 'io', 'tests', 'sas', 'data', - 'test1.sas7bdat') - self.f2 = os.path.join(os.path.dirname(__file__), '..', '..', - 'pandas', 'io', 'tests', 'sas', 'data', - 'paxraw_d_short.xpt') + + testdir = os.path.join(os.path.dirname(__file__), '..', '..', + 'pandas', 'tests', 'io', 'sas') + if not os.path.exists(testdir): + testdir = os.path.join(os.path.dirname(__file__), '..', '..', + 'pandas', 'io', 'tests', 'sas') + self.f = os.path.join(testdir, 'data', 'test1.sas7bdat') + self.f2 = os.path.join(testdir, 'data', 'paxraw_d_short.xpt') def time_read_sas7bdat(self): pd.read_sas(self.f, format='sas7bdat') def time_read_xport(self): - pd.read_sas(self.f, format='xport') + pd.read_sas(self.f2, format='xport') class CSV(_Packers): diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py index 56ccc94c414fb..b1a58e49fe86c 100644 --- a/asv_bench/benchmarks/pandas_vb_common.py +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -1,9 +1,7 @@ from pandas import * import pandas as pd -from datetime import timedelta from numpy.random import randn from numpy.random import randint -from numpy.random import permutation import pandas.util.testing as tm import random import numpy as np @@ -18,7 +16,7 @@ np.random.seed(1234) # try em until it works! -for imp in ['pandas_tseries', 'pandas.lib', 'pandas._libs.lib']: +for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']: try: lib = import_module(imp) break diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index faedce6c574ec..cc6071b054662 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -1,4 +1,5 @@ from .pandas_vb_common import * +from datetime import timedelta class Constructors1(object): @@ -24,7 +25,7 @@ class Constructors2(object): def setup(self): self.data_frames = {} for x in range(100): - self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1))) + self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D')) self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) self.data_frames[x] = self.df @@ -36,7 +37,7 @@ class Constructors3(object): goal_time = 0.2 def setup(self): - self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1))) + self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D')) self.data_frames = {} for x in range(100): self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 66b8af53801ac..63562f90eab2b 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -1,6 +1,4 @@ from .pandas_vb_common import * -from pandas.compat import range -from datetime import timedelta class replace_fillna(object): diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index b63b3386a7563..f5ea4d7875931 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -4,7 +4,6 @@ from pandas.tseries.converter import DatetimeConverter from .pandas_vb_common import * import pandas as pd -from datetime import timedelta import datetime as dt try: import pandas.tseries.holiday @@ -57,7 +56,7 @@ def setup(self): self.a = self.rng7[:50000].append(self.rng7[50002:]) def time_add_timedelta(self): - (self.rng + timedelta(minutes=2)) + (self.rng + dt.timedelta(minutes=2)) def time_add_offset_delta(self): (self.rng + self.delta_offset)