Skip to content

CLN: ASV series_methods #19046

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 3, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 64 additions & 130 deletions asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
@@ -1,185 +1,119 @@
from .pandas_vb_common import *
from datetime import datetime

import numpy as np
import pandas.util.testing as tm
from pandas import Series, date_range, NaT

class series_constructor_no_data_datetime_index(object):
goal_time = 0.2

def setup(self):
self.dr = pd.date_range(
start=datetime(2015,10,26),
end=datetime(2016,1,1),
freq='50s'
) # ~100k long

def time_series_constructor_no_data_datetime_index(self):
Series(data=None, index=self.dr)


class series_constructor_dict_data_datetime_index(object):
goal_time = 0.2

def setup(self):
self.dr = pd.date_range(
start=datetime(2015, 10, 26),
end=datetime(2016, 1, 1),
freq='50s'
) # ~100k long
self.data = {d: v for d, v in zip(self.dr, range(len(self.dr)))}
from .pandas_vb_common import setup # noqa

def time_series_constructor_no_data_datetime_index(self):
Series(data=self.data, index=self.dr)

class SeriesConstructor(object):

class series_isin_int64(object):
goal_time = 0.2
params = [None, 'dict']
param_names = ['data']

def setup(self):
self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64')
self.s4 = Series(np.random.randint(1, 100, 10000000)).astype('int64')
self.values = [1, 2]
def setup(self, data):
self.idx = date_range(start=datetime(2015, 10, 26),
end=datetime(2016, 1, 1),
freq='50s')
dict_data = dict(zip(self.idx, range(len(self.idx))))
self.data = None if data is None else dict_data

def time_series_isin_int64(self):
self.s3.isin(self.values)
def time_constructor(self, data):
Series(data=self.data, index=self.idx)

def time_series_isin_int64_large(self):
self.s4.isin(self.values)

class IsIn(object):

class series_isin_object(object):
goal_time = 0.2
params = ['int64', 'object']
param_names = ['dtype']

def setup(self):
self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64')
def setup(self, dtype):
self.s = Series(np.random.randint(1, 10, 100000)).astype(dtype)
self.values = [1, 2]
self.s4 = self.s3.astype('object')

def time_series_isin_object(self):
self.s4.isin(self.values)
def time_isin(self, dtypes):
self.s.isin(self.values)


class series_nlargest1(object):
goal_time = 0.2

def setup(self):
self.s1 = Series(np.random.randn(10000))
self.s2 = Series(np.random.randint(1, 10, 10000))
self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64')
self.values = [1, 2]
self.s4 = self.s3.astype('object')

def time_series_nlargest1(self):
self.s1.nlargest(3, keep='last')
self.s1.nlargest(3, keep='first')

class NSort(object):

class series_nlargest2(object):
goal_time = 0.2
params = ['last', 'first']
param_names = ['keep']

def setup(self):
self.s1 = Series(np.random.randn(10000))
self.s2 = Series(np.random.randint(1, 10, 10000))
self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64')
self.values = [1, 2]
self.s4 = self.s3.astype('object')

def time_series_nlargest2(self):
self.s2.nlargest(3, keep='last')
self.s2.nlargest(3, keep='first')
def setup(self, keep):
self.s = Series(np.random.randint(1, 10, 100000))

def time_nlargest(self, keep):
self.s.nlargest(3, keep=keep)

class series_nsmallest2(object):
goal_time = 0.2
def time_nsmallest(self, keep):
self.s.nsmallest(3, keep=keep)

def setup(self):
self.s1 = Series(np.random.randn(10000))
self.s2 = Series(np.random.randint(1, 10, 10000))
self.s3 = Series(np.random.randint(1, 10, 100000)).astype('int64')
self.values = [1, 2]
self.s4 = self.s3.astype('object')

def time_series_nsmallest2(self):
self.s2.nsmallest(3, keep='last')
self.s2.nsmallest(3, keep='first')
class Dropna(object):


class series_dropna_int64(object):
goal_time = 0.2

def setup(self):
self.s = Series(np.random.randint(1, 10, 1000000))

def time_series_dropna_int64(self):
params = ['int', 'datetime']
param_names = ['dtype']

def setup(self, dtype):
N = 10**6
data = {'int': np.random.randint(1, 10, N),
'datetime': date_range('2000-01-01', freq='S', periods=N)}
self.s = Series(data[dtype])
if dtype == 'datetime':
self.s[np.random.randint(1, N, 100)] = NaT

def time_dropna(self, dtype):
self.s.dropna()


class series_dropna_datetime(object):
goal_time = 0.2

def setup(self):
self.s = Series(pd.date_range('2000-01-01', freq='S', periods=1000000))
self.s[np.random.randint(1, 1000000, 100)] = pd.NaT

def time_series_dropna_datetime(self):
self.s.dropna()

class Map(object):

class series_map_dict(object):
goal_time = 0.2
params = ['dict', 'Series']
param_names = 'mapper'

def setup(self):
def setup(self, mapper):
map_size = 1000
map_data = Series(map_size - np.arange(map_size))
self.map_data = map_data if mapper == 'Series' else map_data.to_dict()
self.s = Series(np.random.randint(0, map_size, 10000))
self.map_dict = {i: map_size - i for i in range(map_size)}

def time_series_map_dict(self):
self.s.map(self.map_dict)
def time_map(self, mapper):
self.s.map(self.map_data)


class series_map_series(object):
goal_time = 0.2
class Clip(object):

def setup(self):
map_size = 1000
self.s = Series(np.random.randint(0, map_size, 10000))
self.map_series = Series(map_size - np.arange(map_size))

def time_series_map_series(self):
self.s.map(self.map_series)


class series_clip(object):
goal_time = 0.2

def setup(self):
self.s = pd.Series(np.random.randn(50))
self.s = Series(np.random.randn(50))

def time_series_dropna_datetime(self):
def time_clip(self):
self.s.clip(0, 1)


class series_value_counts(object):
goal_time = 0.2
class ValueCounts(object):

def setup(self):
self.s = Series(np.random.randint(0, 1000, size=100000))
self.s2 = self.s.astype(float)
goal_time = 0.2
params = ['int', 'float', 'object']
param_names = ['dtype']

self.K = 1000
self.N = 100000
self.uniques = tm.makeStringIndex(self.K).values
self.s3 = Series(np.tile(self.uniques, (self.N // self.K)))
def setup(self, dtype):
self.s = Series(np.random.randint(0, 1000, size=100000)).astype(dtype)

def time_value_counts_int64(self):
def time_value_counts(self, dtype):
self.s.value_counts()

def time_value_counts_float64(self):
self.s2.value_counts()

def time_value_counts_strings(self):
self.s.value_counts()

class Dir(object):

class series_dir(object):
goal_time = 0.2

def setup(self):
Expand Down