diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 0ce003d1a9277..478aba278029c 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -1,185 +1,119 @@ -from .pandas_vb_common import * +from datetime import datetime +import numpy as np +import pandas.util.testing as tm +from pandas import Series, date_range, NaT -class series_constructor_no_data_datetime_index(object): - goal_time = 0.2 - - def setup(self): - self.dr = pd.date_range( - start=datetime(2015,10,26), - end=datetime(2016,1,1), - freq='50s' - ) # ~100k long - - def time_series_constructor_no_data_datetime_index(self): - Series(data=None, index=self.dr) - - -class series_constructor_dict_data_datetime_index(object): - goal_time = 0.2 - - def setup(self): - self.dr = pd.date_range( - start=datetime(2015, 10, 26), - end=datetime(2016, 1, 1), - freq='50s' - ) # ~100k long - self.data = {d: v for d, v in zip(self.dr, range(len(self.dr)))} +from .pandas_vb_common import setup # noqa - def time_series_constructor_no_data_datetime_index(self): - Series(data=self.data, index=self.dr) +class SeriesConstructor(object): -class series_isin_int64(object): goal_time = 0.2 + params = [None, 'dict'] + param_names = ['data'] - def setup(self): - self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') - self.s4 = Series(np.random.randint(1, 100, 10000000)).astype('int64') - self.values = [1, 2] + def setup(self, data): + self.idx = date_range(start=datetime(2015, 10, 26), + end=datetime(2016, 1, 1), + freq='50s') + dict_data = dict(zip(self.idx, range(len(self.idx)))) + self.data = None if data is None else dict_data - def time_series_isin_int64(self): - self.s3.isin(self.values) + def time_constructor(self, data): + Series(data=self.data, index=self.idx) - def time_series_isin_int64_large(self): - self.s4.isin(self.values) +class IsIn(object): -class series_isin_object(object): goal_time = 0.2 + params = ['int64', 'object'] + param_names = ['dtype'] - def setup(self): - self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') + def setup(self, dtype): + self.s = Series(np.random.randint(1, 10, 100000)).astype(dtype) self.values = [1, 2] - self.s4 = self.s3.astype('object') - def time_series_isin_object(self): - self.s4.isin(self.values) + def time_isin(self, dtypes): + self.s.isin(self.values) -class series_nlargest1(object): - goal_time = 0.2 - - def setup(self): - self.s1 = Series(np.random.randn(10000)) - self.s2 = Series(np.random.randint(1, 10, 10000)) - self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') - self.values = [1, 2] - self.s4 = self.s3.astype('object') - - def time_series_nlargest1(self): - self.s1.nlargest(3, keep='last') - self.s1.nlargest(3, keep='first') - +class NSort(object): -class series_nlargest2(object): goal_time = 0.2 + params = ['last', 'first'] + param_names = ['keep'] - def setup(self): - self.s1 = Series(np.random.randn(10000)) - self.s2 = Series(np.random.randint(1, 10, 10000)) - self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') - self.values = [1, 2] - self.s4 = self.s3.astype('object') - - def time_series_nlargest2(self): - self.s2.nlargest(3, keep='last') - self.s2.nlargest(3, keep='first') + def setup(self, keep): + self.s = Series(np.random.randint(1, 10, 100000)) + def time_nlargest(self, keep): + self.s.nlargest(3, keep=keep) -class series_nsmallest2(object): - goal_time = 0.2 + def time_nsmallest(self, keep): + self.s.nsmallest(3, keep=keep) - def setup(self): - self.s1 = Series(np.random.randn(10000)) - self.s2 = Series(np.random.randint(1, 10, 10000)) - self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64') - self.values = [1, 2] - self.s4 = self.s3.astype('object') - def time_series_nsmallest2(self): - self.s2.nsmallest(3, keep='last') - self.s2.nsmallest(3, keep='first') +class Dropna(object): - -class series_dropna_int64(object): goal_time = 0.2 - - def setup(self): - self.s = Series(np.random.randint(1, 10, 1000000)) - - def time_series_dropna_int64(self): + params = ['int', 'datetime'] + param_names = ['dtype'] + + def setup(self, dtype): + N = 10**6 + data = {'int': np.random.randint(1, 10, N), + 'datetime': date_range('2000-01-01', freq='S', periods=N)} + self.s = Series(data[dtype]) + if dtype == 'datetime': + self.s[np.random.randint(1, N, 100)] = NaT + + def time_dropna(self, dtype): self.s.dropna() -class series_dropna_datetime(object): - goal_time = 0.2 - - def setup(self): - self.s = Series(pd.date_range('2000-01-01', freq='S', periods=1000000)) - self.s[np.random.randint(1, 1000000, 100)] = pd.NaT - - def time_series_dropna_datetime(self): - self.s.dropna() - +class Map(object): -class series_map_dict(object): goal_time = 0.2 + params = ['dict', 'Series'] + param_names = 'mapper' - def setup(self): + def setup(self, mapper): map_size = 1000 + map_data = Series(map_size - np.arange(map_size)) + self.map_data = map_data if mapper == 'Series' else map_data.to_dict() self.s = Series(np.random.randint(0, map_size, 10000)) - self.map_dict = {i: map_size - i for i in range(map_size)} - def time_series_map_dict(self): - self.s.map(self.map_dict) + def time_map(self, mapper): + self.s.map(self.map_data) -class series_map_series(object): - goal_time = 0.2 +class Clip(object): - def setup(self): - map_size = 1000 - self.s = Series(np.random.randint(0, map_size, 10000)) - self.map_series = Series(map_size - np.arange(map_size)) - - def time_series_map_series(self): - self.s.map(self.map_series) - - -class series_clip(object): goal_time = 0.2 def setup(self): - self.s = pd.Series(np.random.randn(50)) + self.s = Series(np.random.randn(50)) - def time_series_dropna_datetime(self): + def time_clip(self): self.s.clip(0, 1) -class series_value_counts(object): - goal_time = 0.2 +class ValueCounts(object): - def setup(self): - self.s = Series(np.random.randint(0, 1000, size=100000)) - self.s2 = self.s.astype(float) + goal_time = 0.2 + params = ['int', 'float', 'object'] + param_names = ['dtype'] - self.K = 1000 - self.N = 100000 - self.uniques = tm.makeStringIndex(self.K).values - self.s3 = Series(np.tile(self.uniques, (self.N // self.K))) + def setup(self, dtype): + self.s = Series(np.random.randint(0, 1000, size=100000)).astype(dtype) - def time_value_counts_int64(self): + def time_value_counts(self, dtype): self.s.value_counts() - def time_value_counts_float64(self): - self.s2.value_counts() - - def time_value_counts_strings(self): - self.s.value_counts() +class Dir(object): -class series_dir(object): goal_time = 0.2 def setup(self):