From 224779b49d3451752f4d16e4e53b4466f1d7b254 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 Mar 2021 09:51:53 +0100 Subject: [PATCH 1/2] PERF: avoid zeros_like in groupby.pyx --- pandas/_libs/groupby.pyx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 43bf6d9dd1fee..522e09536375a 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -497,8 +497,9 @@ def _group_add(complexfloating_t[:, :] out, raise ValueError("len(index) != len(labels)") nobs = np.zeros((out).shape, dtype=np.int64) - sumx = np.zeros_like(out) - compensation = np.zeros_like(out) + # the below is equivalent to `np.zeros_like(out)` but faster + sumx = np.zeros((out).shape, dtype=(out).base.dtype) + compensation = np.zeros((out).shape, dtype=(out).base.dtype) N, K = (values).shape @@ -665,8 +666,9 @@ def _group_mean(floating[:, :] out, raise ValueError("len(index) != len(labels)") nobs = np.zeros((out).shape, dtype=np.int64) - sumx = np.zeros_like(out) - compensation = np.zeros_like(out) + # the below is equivalent to `np.zeros_like(out)` but faster + sumx = np.zeros((out).shape, dtype=(out).base.dtype) + compensation = np.zeros((out).shape, dtype=(out).base.dtype) N, K = (values).shape From 7800b70957f2ee348c0892f20f5ba5e0762fbf4c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 3 Mar 2021 10:00:59 +0100 Subject: [PATCH 2/2] add var/prod as well --- pandas/_libs/groupby.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 522e09536375a..40e82798c0753 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -556,7 +556,7 @@ def _group_prod(floating[:, :] out, raise ValueError("len(index) != len(labels)") nobs = np.zeros((out).shape, dtype=np.int64) - prodx = np.ones_like(out) + prodx = np.ones((out).shape, dtype=(out).base.dtype) N, K = (values).shape @@ -609,7 +609,7 @@ def _group_var(floating[:, :] out, raise ValueError("len(index) != len(labels)") nobs = np.zeros((out).shape, dtype=np.int64) - mean = np.zeros_like(out) + mean = np.zeros((out).shape, dtype=(out).base.dtype) N, K = (values).shape