From 54bf3aa62248db61e3d385b31c99e78600d3706c Mon Sep 17 00:00:00 2001 From: maushumee Date: Mon, 5 Aug 2024 10:17:24 -0400 Subject: [PATCH 1/6] Add numeric_only to func signature --- pandas/core/groupby/groupby.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c07bc56377151..204eff32125c1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4681,12 +4681,14 @@ def rank( @final @Substitution(name="groupby") @Substitution(see_also=_common_see_also) - def cumprod(self, *args, **kwargs) -> NDFrameT: + def cumprod(self, numeric_only: bool = False, *args, **kwargs) -> NDFrameT: """ Cumulative product for each group. Parameters ---------- + numeric_only : bool + Optional argument with default as False. *args : tuple Positional arguments to be passed to `func`. **kwargs : dict @@ -4734,18 +4736,20 @@ def cumprod(self, *args, **kwargs) -> NDFrameT: horse 16 10 bull 6 9 """ - nv.validate_groupby_func("cumprod", args, kwargs, ["numeric_only", "skipna"]) - return self._cython_transform("cumprod", **kwargs) + nv.validate_groupby_func("cumprod", args, kwargs, ["skipna"]) + return self._cython_transform("cumprod", numeric_only, **kwargs) @final @Substitution(name="groupby") @Substitution(see_also=_common_see_also) - def cumsum(self, *args, **kwargs) -> NDFrameT: + def cumsum(self, numeric_only: bool = False, *args, **kwargs) -> NDFrameT: """ Cumulative sum for each group. Parameters ---------- + numeric_only : bool + Optional argument with default as False. *args : tuple Positional arguments to be passed to `func`. **kwargs : dict @@ -4793,8 +4797,8 @@ def cumsum(self, *args, **kwargs) -> NDFrameT: gorilla 10 7 lion 6 9 """ - nv.validate_groupby_func("cumsum", args, kwargs, ["numeric_only", "skipna"]) - return self._cython_transform("cumsum", **kwargs) + nv.validate_groupby_func("cumsum", args, kwargs, ["skipna"]) + return self._cython_transform("cumsum", numeric_only, **kwargs) @final @Substitution(name="groupby") From e6a103a9050adf0fc4c260f4cab7915c89f5e0a8 Mon Sep 17 00:00:00 2001 From: maushumee Date: Mon, 5 Aug 2024 10:20:32 -0400 Subject: [PATCH 2/6] Add numeric_only to cumprod and cumsum func signature --- pandas/core/groupby/groupby.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 204eff32125c1..326028ea38489 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4687,8 +4687,6 @@ def cumprod(self, numeric_only: bool = False, *args, **kwargs) -> NDFrameT: Parameters ---------- - numeric_only : bool - Optional argument with default as False. *args : tuple Positional arguments to be passed to `func`. **kwargs : dict @@ -4748,8 +4746,6 @@ def cumsum(self, numeric_only: bool = False, *args, **kwargs) -> NDFrameT: Parameters ---------- - numeric_only : bool - Optional argument with default as False. *args : tuple Positional arguments to be passed to `func`. **kwargs : dict From b0437c3f09e50c0db4860401bdde2ce38cf56b7d Mon Sep 17 00:00:00 2001 From: maushumee Date: Mon, 5 Aug 2024 10:33:51 -0400 Subject: [PATCH 3/6] Added docstring --- pandas/core/groupby/groupby.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 326028ea38489..204eff32125c1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4687,6 +4687,8 @@ def cumprod(self, numeric_only: bool = False, *args, **kwargs) -> NDFrameT: Parameters ---------- + numeric_only : bool + Optional argument with default as False. *args : tuple Positional arguments to be passed to `func`. **kwargs : dict @@ -4746,6 +4748,8 @@ def cumsum(self, numeric_only: bool = False, *args, **kwargs) -> NDFrameT: Parameters ---------- + numeric_only : bool + Optional argument with default as False. *args : tuple Positional arguments to be passed to `func`. **kwargs : dict From 75f0b15f595b2833f3ef5c0e74f7c0580413c690 Mon Sep 17 00:00:00 2001 From: maushumee Date: Tue, 6 Aug 2024 10:23:24 -0400 Subject: [PATCH 4/6] Fix tests and add documentation --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/tests/groupby/test_api.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 846d863910b4c..32c98fbf9d655 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -610,6 +610,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`) - Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`) +- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`) - Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`) - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`) - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`) diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py index 33b39bad4ab81..013b308cd14cd 100644 --- a/pandas/tests/groupby/test_api.py +++ b/pandas/tests/groupby/test_api.py @@ -185,7 +185,7 @@ def test_frame_consistency(groupby_func): elif groupby_func in ("cummax", "cummin"): exclude_expected = {"axis", "skipna", "args"} elif groupby_func in ("cumprod", "cumsum"): - exclude_expected = {"axis", "skipna", "numeric_only"} + exclude_expected = {"axis", "skipna"} elif groupby_func in ("pct_change",): exclude_expected = {"kwargs"} elif groupby_func in ("rank",): @@ -245,6 +245,7 @@ def test_series_consistency(request, groupby_func): exclude_result = {"numeric_only"} elif groupby_func in ("cumprod", "cumsum"): exclude_expected = {"skipna"} + exclude_result = {"numeric_only"} elif groupby_func in ("pct_change",): exclude_expected = {"kwargs"} elif groupby_func in ("rank",): From ca1c4f53e0fecad3ff680018253a09e52bdcaf42 Mon Sep 17 00:00:00 2001 From: Maushumee Date: Tue, 6 Aug 2024 16:06:56 -0400 Subject: [PATCH 5/6] Update pandas/core/groupby/groupby.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/groupby/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 204eff32125c1..97b849aa9463b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4687,8 +4687,8 @@ def cumprod(self, numeric_only: bool = False, *args, **kwargs) -> NDFrameT: Parameters ---------- - numeric_only : bool - Optional argument with default as False. + numeric_only : bool, default False + Include only float, int, boolean columns. *args : tuple Positional arguments to be passed to `func`. **kwargs : dict From 14bc4dcf49be1646ad1a8036dff21b6707bc29b0 Mon Sep 17 00:00:00 2001 From: Maushumee Date: Tue, 6 Aug 2024 16:07:09 -0400 Subject: [PATCH 6/6] Update pandas/core/groupby/groupby.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/groupby/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 97b849aa9463b..235765e78c5f1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4748,8 +4748,8 @@ def cumsum(self, numeric_only: bool = False, *args, **kwargs) -> NDFrameT: Parameters ---------- - numeric_only : bool - Optional argument with default as False. + numeric_only : bool, default False + Include only float, int, boolean columns. *args : tuple Positional arguments to be passed to `func`. **kwargs : dict