From 09548b3fc39d393cbdae8390855ded55a7caba18 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 7 Dec 2017 23:12:46 -0800 Subject: [PATCH 1/3] CLN: ASV Rolling Benchmark --- asv_bench/benchmarks/rolling.py | 199 +++++--------------------------- 1 file changed, 27 insertions(+), 172 deletions(-) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 899349cd21f84..ad9dfb5a845fb 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -1,185 +1,40 @@ -from .pandas_vb_common import * import pandas as pd import numpy as np +from .pandas_vb_common import setup -class DataframeRolling(object): - goal_time = 0.2 - def setup(self): - self.N = 100000 - self.Ns = 10000 - self.df = pd.DataFrame({'a': np.random.random(self.N)}) - self.dfs = pd.DataFrame({'a': np.random.random(self.Ns)}) - self.wins = 10 - self.winl = 1000 +class Methods(object): - def time_rolling_quantile_0(self): - (self.df.rolling(self.wins).quantile(0.0)) + sample_time = 0.2 + params = (['DataFrame', 'Series'], + [10, 1000], + [10**4, 10**5], + ['int', 'float'], + ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt', + 'sum', 'corr', 'cov']) + param_names = ['contructor', 'window', 'num_data', 'dtype', 'method'] - def time_rolling_quantile_1(self): - (self.df.rolling(self.wins).quantile(1.0)) + def setup(self, contructor, window, num_data, dtype, method): + arr = np.random.random(num_data).astype(dtype) + self.data = getattr(pd, contructor)(arr) - def time_rolling_quantile_median(self): - (self.df.rolling(self.wins).quantile(0.5)) + def time_rolling(self, contructor, window, num_data, dtype, method): + getattr(self.data.rolling(window), method)() - def time_rolling_median(self): - (self.df.rolling(self.wins).median()) - def time_rolling_mean(self): - (self.df.rolling(self.wins).mean()) +class Quantile(object): - def time_rolling_max(self): - (self.df.rolling(self.wins).max()) + sample_time = 0.2 + params = (['DataFrame', 'Series'], + [10, 1000], + [10**4, 10**5], + [0, 0.5, 1]) + param_names = ['contructor', 'window', 'num_data', 'dtype', 'percentile'] - def time_rolling_min(self): - (self.df.rolling(self.wins).min()) + def setup(self, contructor, window, num_data, dtype, percentile): + arr = np.random.random(num_data).astype(dtype) + self.data = getattr(pd, contructor)(arr) - def time_rolling_std(self): - (self.df.rolling(self.wins).std()) - - def time_rolling_count(self): - (self.df.rolling(self.wins).count()) - - def time_rolling_skew(self): - (self.df.rolling(self.wins).skew()) - - def time_rolling_kurt(self): - (self.df.rolling(self.wins).kurt()) - - def time_rolling_sum(self): - (self.df.rolling(self.wins).sum()) - - def time_rolling_corr(self): - (self.dfs.rolling(self.wins).corr()) - - def time_rolling_cov(self): - (self.dfs.rolling(self.wins).cov()) - - def time_rolling_quantile_0_l(self): - (self.df.rolling(self.winl).quantile(0.0)) - - def time_rolling_quantile_1_l(self): - (self.df.rolling(self.winl).quantile(1.0)) - - def time_rolling_quantile_median_l(self): - (self.df.rolling(self.winl).quantile(0.5)) - - def time_rolling_median_l(self): - (self.df.rolling(self.winl).median()) - - def time_rolling_mean_l(self): - (self.df.rolling(self.winl).mean()) - - def time_rolling_max_l(self): - (self.df.rolling(self.winl).max()) - - def time_rolling_min_l(self): - (self.df.rolling(self.winl).min()) - - def time_rolling_std_l(self): - (self.df.rolling(self.wins).std()) - - def time_rolling_count_l(self): - (self.df.rolling(self.wins).count()) - - def time_rolling_skew_l(self): - (self.df.rolling(self.wins).skew()) - - def time_rolling_kurt_l(self): - (self.df.rolling(self.wins).kurt()) - - def time_rolling_sum_l(self): - (self.df.rolling(self.wins).sum()) - - -class SeriesRolling(object): - goal_time = 0.2 - - def setup(self): - self.N = 100000 - self.Ns = 10000 - self.df = pd.DataFrame({'a': np.random.random(self.N)}) - self.dfs = pd.DataFrame({'a': np.random.random(self.Ns)}) - self.sr = self.df.a - self.srs = self.dfs.a - self.wins = 10 - self.winl = 1000 - - def time_rolling_quantile_0(self): - (self.sr.rolling(self.wins).quantile(0.0)) - - def time_rolling_quantile_1(self): - (self.sr.rolling(self.wins).quantile(1.0)) - - def time_rolling_quantile_median(self): - (self.sr.rolling(self.wins).quantile(0.5)) - - def time_rolling_median(self): - (self.sr.rolling(self.wins).median()) - - def time_rolling_mean(self): - (self.sr.rolling(self.wins).mean()) - - def time_rolling_max(self): - (self.sr.rolling(self.wins).max()) - - def time_rolling_min(self): - (self.sr.rolling(self.wins).min()) - - def time_rolling_std(self): - (self.sr.rolling(self.wins).std()) - - def time_rolling_count(self): - (self.sr.rolling(self.wins).count()) - - def time_rolling_skew(self): - (self.sr.rolling(self.wins).skew()) - - def time_rolling_kurt(self): - (self.sr.rolling(self.wins).kurt()) - - def time_rolling_sum(self): - (self.sr.rolling(self.wins).sum()) - - def time_rolling_corr(self): - (self.srs.rolling(self.wins).corr()) - - def time_rolling_cov(self): - (self.srs.rolling(self.wins).cov()) - - def time_rolling_quantile_0_l(self): - (self.sr.rolling(self.winl).quantile(0.0)) - - def time_rolling_quantile_1_l(self): - (self.sr.rolling(self.winl).quantile(1.0)) - - def time_rolling_quantile_median_l(self): - (self.sr.rolling(self.winl).quantile(0.5)) - - def time_rolling_median_l(self): - (self.sr.rolling(self.winl).median()) - - def time_rolling_mean_l(self): - (self.sr.rolling(self.winl).mean()) - - def time_rolling_max_l(self): - (self.sr.rolling(self.winl).max()) - - def time_rolling_min_l(self): - (self.sr.rolling(self.winl).min()) - - def time_rolling_std_l(self): - (self.sr.rolling(self.wins).std()) - - def time_rolling_count_l(self): - (self.sr.rolling(self.wins).count()) - - def time_rolling_skew_l(self): - (self.sr.rolling(self.wins).skew()) - - def time_rolling_kurt_l(self): - (self.sr.rolling(self.wins).kurt()) - - def time_rolling_sum_l(self): - (self.sr.rolling(self.wins).sum()) + def time_quantile(self, contructor, window, num_data, dtype, percentile): + self.data.rolling(window).quantile(0.5) From bbb33bcd0cdd85e8906efb287d60f6104907c6fe Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 10 Dec 2017 19:28:54 -0800 Subject: [PATCH 2/3] additional cleanup --- asv_bench/benchmarks/rolling.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index ad9dfb5a845fb..45142c53dcd01 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -1,7 +1,7 @@ import pandas as pd import numpy as np -from .pandas_vb_common import setup +from .pandas_vb_common import setup # noqa class Methods(object): @@ -9,18 +9,18 @@ class Methods(object): sample_time = 0.2 params = (['DataFrame', 'Series'], [10, 1000], - [10**4, 10**5], ['int', 'float'], ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt', 'sum', 'corr', 'cov']) - param_names = ['contructor', 'window', 'num_data', 'dtype', 'method'] + param_names = ['contructor', 'window', 'dtype', 'method'] - def setup(self, contructor, window, num_data, dtype, method): - arr = np.random.random(num_data).astype(dtype) - self.data = getattr(pd, contructor)(arr) + def setup(self, contructor, window, dtype, method): + N = 10**5 + arr = np.random.random(N).astype(dtype) + self.roll = getattr(pd, contructor)(arr).rolling(window) - def time_rolling(self, contructor, window, num_data, dtype, method): - getattr(self.data.rolling(window), method)() + def time_rolling(self, contructor, window, dtype, method): + getattr(self.roll, method)() class Quantile(object): @@ -28,13 +28,14 @@ class Quantile(object): sample_time = 0.2 params = (['DataFrame', 'Series'], [10, 1000], - [10**4, 10**5], + ['int', 'float'], [0, 0.5, 1]) - param_names = ['contructor', 'window', 'num_data', 'dtype', 'percentile'] + param_names = ['contructor', 'window', 'dtype', 'percentile'] - def setup(self, contructor, window, num_data, dtype, percentile): - arr = np.random.random(num_data).astype(dtype) - self.data = getattr(pd, contructor)(arr) + def setup(self, contructor, window, dtype, percentile): + N = 10**5 + arr = np.random.random(N).astype(dtype) + self.roll = getattr(pd, contructor)(arr).rolling(window) - def time_quantile(self, contructor, window, num_data, dtype, percentile): - self.data.rolling(window).quantile(0.5) + def time_quantile(self, contructor, window, dtype, percentile): + self.roll.quantile(percentile) From 5e1a79b6a4a085d72f2af33fb76433c1dc9cbcd2 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 11 Dec 2017 20:52:45 -0800 Subject: [PATCH 3/3] Add current rolling to gil.py --- asv_bench/benchmarks/gil.py | 42 +++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 654e5d3bfec0e..7d63d78084270 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -180,19 +180,35 @@ def setup(self, method): raise NotImplementedError win = 100 arr = np.random.rand(100000) - rolling = {'rolling_median': rolling_median, - 'rolling_mean': rolling_mean, - 'rolling_min': rolling_min, - 'rolling_max': rolling_max, - 'rolling_var': rolling_var, - 'rolling_skew': rolling_skew, - 'rolling_kurt': rolling_kurt, - 'rolling_std': rolling_std} - - @test_parallel(num_threads=2) - def parallel_rolling(): - rolling[method](arr, win) - self.parallel_rolling = parallel_rolling + if hasattr(DataFrame, 'rolling'): + rolling = {'rolling_median': 'median', + 'rolling_mean': 'mean', + 'rolling_min': 'min', + 'rolling_max': 'max', + 'rolling_var': 'var', + 'rolling_skew': 'skew', + 'rolling_kurt': 'kurt', + 'rolling_std': 'std'} + df = DataFrame(arr).rolling(win) + + @test_parallel(num_threads=2) + def parallel_rolling(): + getattr(df, rolling[method])() + self.parallel_rolling = parallel_rolling + else: + rolling = {'rolling_median': rolling_median, + 'rolling_mean': rolling_mean, + 'rolling_min': rolling_min, + 'rolling_max': rolling_max, + 'rolling_var': rolling_var, + 'rolling_skew': rolling_skew, + 'rolling_kurt': rolling_kurt, + 'rolling_std': rolling_std} + + @test_parallel(num_threads=2) + def parallel_rolling(): + rolling[method](arr, win) + self.parallel_rolling = parallel_rolling def time_rolling(self, method): self.parallel_rolling()