From cb3a0aeae9e9d9fb7f0f31c7206c04bce0594904 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 16 Feb 2021 20:45:42 +0100 Subject: [PATCH 1/3] ASV: add frame ops benchmarks for varying n_rows/n_columns ratios --- asv_bench/benchmarks/arithmetic.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 7478efbf22609..4cb24469bdb31 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -110,16 +110,22 @@ class FrameWithFrameWide: operator.add, operator.floordiv, operator.gt, - ] + ], + [ + # (n_rows, n_columns) + (1_000_000, 10), + (100_000, 100), + (10_000, 1000), + (1000, 10_000), + ], ] - param_names = ["op"] + param_names = ["op", "shape"] - def setup(self, op): + def setup(self, op, shape): # we choose dtypes so as to make the blocks # a) not perfectly match between right and left # b) appreciably bigger than single columns - n_cols = 2000 - n_rows = 500 + n_rows, n_cols = shape # construct dataframe with 2 blocks arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8") @@ -131,7 +137,7 @@ def setup(self, op): df._consolidate_inplace() # TODO: GH#33198 the setting here shoudlnt need two steps - arr1 = np.random.randn(n_rows, n_cols // 4).astype("f8") + arr1 = np.random.randn(n_rows, max(n_cols // 4, 3)).astype("f8") arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8") arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8") df2 = pd.concat( @@ -145,11 +151,11 @@ def setup(self, op): self.left = df self.right = df2 - def time_op_different_blocks(self, op): + def time_op_different_blocks(self, op, shape): # blocks (and dtypes) are not aligned op(self.left, self.right) - def time_op_same_blocks(self, op): + def time_op_same_blocks(self, op, shape): # blocks (and dtypes) are aligned op(self.left, self.left) From 5eb1933a3b7dfd05845ada35a8f102ea9e7d0e7c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 17 Feb 2021 09:19:58 +0100 Subject: [PATCH 2/3] limit size for floordiv --- asv_bench/benchmarks/arithmetic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 4cb24469bdb31..d19c2ceb5b4f4 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -127,6 +127,10 @@ def setup(self, op, shape): # b) appreciably bigger than single columns n_rows, n_cols = shape + if op is operator.floordiv: + # floordiv is much slower as the other operations -> use less data + n_rows = n_rows // 10 + # construct dataframe with 2 blocks arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8") arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4") From de36dfd226ce6679ad5e62664f66efdd69adfb3c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 17 Feb 2021 17:35:45 +0100 Subject: [PATCH 3/3] Update asv_bench/benchmarks/arithmetic.py --- asv_bench/benchmarks/arithmetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index d19c2ceb5b4f4..ff049e61d02cf 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -128,7 +128,7 @@ def setup(self, op, shape): n_rows, n_cols = shape if op is operator.floordiv: - # floordiv is much slower as the other operations -> use less data + # floordiv is much slower than the other operations -> use less data n_rows = n_rows // 10 # construct dataframe with 2 blocks