From f21c26e496613c3aafd91bea3e68ca47132dd53b Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Mon, 13 Nov 2023 21:56:58 +0700 Subject: [PATCH 1/5] asv FrameMixedDtypesOps --- asv_bench/benchmarks/stat_ops.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 1652fcf8d48da..d63f86b0800eb 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -20,6 +20,25 @@ def time_op(self, op, dtype, axis): self.df_func(axis=axis) +class FrameMixedDtypesOps: + params = [ops, [0, 1, None]] + param_names = ["op", "axis"] + + def setup(self, op, axis): + N = 1_000_000 + df = pd.DataFrame( + { + "f": np.random.normal(0.0, 1.0, N), + "i": np.random.randint(0, N, N), + "ts": pd.date_range(start="1/1/2000", periods=N, freq="h").tolist(), + } + ) + self.df_func = getattr(df, op) + + def time_op(self, op, axis): + self.df_func(axis=axis) + + class FrameMultiIndexOps: params = [ops] param_names = ["op"] From 0155cc23f0e49c72325ddefb034d75ab31e7774b Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Mon, 13 Nov 2023 23:13:12 +0700 Subject: [PATCH 2/5] fix --- asv_bench/benchmarks/stat_ops.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index d63f86b0800eb..621c02ba3c247 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -33,6 +33,18 @@ def setup(self, op, axis): "ts": pd.date_range(start="1/1/2000", periods=N, freq="h").tolist(), } ) + if op in ("sum", "skew", "kurt", "prod", "sem", "var") or ( + (op, axis) + in ( + ("mean", 1), + ("mean", None), + ("median", 1), + ("median", None), + ("std", 1), + ) + ): + del df["ts"] + self.df_func = getattr(df, op) def time_op(self, op, axis): From 973dfc540fe6d7178578d442e2684993e057f590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Nguy=E1=BB=85n?= <30631476+quangngd@users.noreply.github.com> Date: Mon, 13 Nov 2023 16:18:38 -0800 Subject: [PATCH 3/5] Update asv_bench/benchmarks/stat_ops.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- asv_bench/benchmarks/stat_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 621c02ba3c247..cd63d23d1efd3 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -30,7 +30,7 @@ def setup(self, op, axis): { "f": np.random.normal(0.0, 1.0, N), "i": np.random.randint(0, N, N), - "ts": pd.date_range(start="1/1/2000", periods=N, freq="h").tolist(), + "ts": pd.date_range(start="1/1/2000", periods=N, freq="h"), } ) if op in ("sum", "skew", "kurt", "prod", "sem", "var") or ( From 8ff1930818170733f3f3aff74ab4710e72af2647 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quang=20Nguy=E1=BB=85n?= <30631476+quangngd@users.noreply.github.com> Date: Mon, 13 Nov 2023 16:19:03 -0800 Subject: [PATCH 4/5] Update asv_bench/benchmarks/stat_ops.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- asv_bench/benchmarks/stat_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index cd63d23d1efd3..7969bf55c53f1 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -43,7 +43,8 @@ def setup(self, op, axis): ("std", 1), ) ): - del df["ts"] + # Skipping cases where datetime aggregations are not implemented + raise NotImplementedError self.df_func = getattr(df, op) From 371286d3a697b431f9fa47578bb90e29dd8b7e41 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Tue, 14 Nov 2023 08:12:42 +0700 Subject: [PATCH 5/5] minor --- asv_bench/benchmarks/stat_ops.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 7969bf55c53f1..89bda81ccf08c 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -25,14 +25,6 @@ class FrameMixedDtypesOps: param_names = ["op", "axis"] def setup(self, op, axis): - N = 1_000_000 - df = pd.DataFrame( - { - "f": np.random.normal(0.0, 1.0, N), - "i": np.random.randint(0, N, N), - "ts": pd.date_range(start="1/1/2000", periods=N, freq="h"), - } - ) if op in ("sum", "skew", "kurt", "prod", "sem", "var") or ( (op, axis) in ( @@ -46,6 +38,15 @@ def setup(self, op, axis): # Skipping cases where datetime aggregations are not implemented raise NotImplementedError + N = 1_000_000 + df = pd.DataFrame( + { + "f": np.random.normal(0.0, 1.0, N), + "i": np.random.randint(0, N, N), + "ts": pd.date_range(start="1/1/2000", periods=N, freq="h"), + } + ) + self.df_func = getattr(df, op) def time_op(self, op, axis):