Skip to content

Commit f6146a5

Browse files
authored
CLN: named parameters for GroupBy.(mean|median|var|std) (#31485)
1 parent 7880cf0 commit f6146a5

File tree

3 files changed

+29
-15
lines changed

3 files changed

+29
-15
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ Backwards incompatible API changes
6060
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6161
- :meth:`DataFrame.swaplevels` now raises a ``TypeError`` if the axis is not a :class:`MultiIndex`.
6262
Previously a ``AttributeError`` was raised (:issue:`31126`)
63+
- :meth:`DataFrameGroupby.mean` and :meth:`SeriesGroupby.mean` (and similarly for :meth:`~DataFrameGroupby.median`, :meth:`~DataFrameGroupby.std`` and :meth:`~DataFrameGroupby.var``)
64+
now raise a ``TypeError`` if a not-accepted keyword argument is passed into it.
65+
Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median``) (:issue:`31485`)
6366

6467

6568
.. ---------------------------------------------------------------------------

pandas/core/groupby/groupby.py

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,10 +1180,16 @@ def count(self):
11801180

11811181
@Substitution(name="groupby")
11821182
@Substitution(see_also=_common_see_also)
1183-
def mean(self, *args, **kwargs):
1183+
def mean(self, numeric_only: bool = True):
11841184
"""
11851185
Compute mean of groups, excluding missing values.
11861186
1187+
Parameters
1188+
----------
1189+
numeric_only : bool, default True
1190+
Include only float, int, boolean columns. If None, will attempt to use
1191+
everything, then use only numeric data.
1192+
11871193
Returns
11881194
-------
11891195
pandas.Series or pandas.DataFrame
@@ -1222,33 +1228,40 @@ def mean(self, *args, **kwargs):
12221228
2 4.0
12231229
Name: B, dtype: float64
12241230
"""
1225-
nv.validate_groupby_func("mean", args, kwargs, ["numeric_only"])
12261231
return self._cython_agg_general(
1227-
"mean", alt=lambda x, axis: Series(x).mean(**kwargs), **kwargs
1232+
"mean",
1233+
alt=lambda x, axis: Series(x).mean(numeric_only=numeric_only),
1234+
numeric_only=numeric_only,
12281235
)
12291236

12301237
@Substitution(name="groupby")
12311238
@Appender(_common_see_also)
1232-
def median(self, **kwargs):
1239+
def median(self, numeric_only=True):
12331240
"""
12341241
Compute median of groups, excluding missing values.
12351242
12361243
For multiple groupings, the result index will be a MultiIndex
12371244
1245+
Parameters
1246+
----------
1247+
numeric_only : bool, default True
1248+
Include only float, int, boolean columns. If None, will attempt to use
1249+
everything, then use only numeric data.
1250+
12381251
Returns
12391252
-------
12401253
Series or DataFrame
12411254
Median of values within each group.
12421255
"""
12431256
return self._cython_agg_general(
12441257
"median",
1245-
alt=lambda x, axis: Series(x).median(axis=axis, **kwargs),
1246-
**kwargs,
1258+
alt=lambda x, axis: Series(x).median(axis=axis, numeric_only=numeric_only),
1259+
numeric_only=numeric_only,
12471260
)
12481261

12491262
@Substitution(name="groupby")
12501263
@Appender(_common_see_also)
1251-
def std(self, ddof: int = 1, *args, **kwargs):
1264+
def std(self, ddof: int = 1):
12521265
"""
12531266
Compute standard deviation of groups, excluding missing values.
12541267
@@ -1266,12 +1279,11 @@ def std(self, ddof: int = 1, *args, **kwargs):
12661279
"""
12671280

12681281
# TODO: implement at Cython level?
1269-
nv.validate_groupby_func("std", args, kwargs)
1270-
return np.sqrt(self.var(ddof=ddof, **kwargs))
1282+
return np.sqrt(self.var(ddof=ddof))
12711283

12721284
@Substitution(name="groupby")
12731285
@Appender(_common_see_also)
1274-
def var(self, ddof: int = 1, *args, **kwargs):
1286+
def var(self, ddof: int = 1):
12751287
"""
12761288
Compute variance of groups, excluding missing values.
12771289
@@ -1287,15 +1299,14 @@ def var(self, ddof: int = 1, *args, **kwargs):
12871299
Series or DataFrame
12881300
Variance of values within each group.
12891301
"""
1290-
nv.validate_groupby_func("var", args, kwargs)
12911302
if ddof == 1:
12921303
return self._cython_agg_general(
1293-
"var", alt=lambda x, axis: Series(x).var(ddof=ddof, **kwargs), **kwargs
1304+
"var", alt=lambda x, axis: Series(x).var(ddof=ddof)
12941305
)
12951306
else:
1296-
f = lambda x: x.var(ddof=ddof, **kwargs)
1307+
func = lambda x: x.var(ddof=ddof)
12971308
with _group_selection_context(self):
1298-
return self._python_agg_general(f)
1309+
return self._python_agg_general(func)
12991310

13001311
@Substitution(name="groupby")
13011312
@Appender(_common_see_also)

pandas/tests/groupby/test_function.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ def test_nsmallest():
671671
tm.assert_series_equal(gb.nsmallest(3, keep="last"), e)
672672

673673

674-
@pytest.mark.parametrize("func", ["mean", "var", "std", "cumprod", "cumsum"])
674+
@pytest.mark.parametrize("func", ["cumprod", "cumsum"])
675675
def test_numpy_compat(func):
676676
# see gh-12811
677677
df = pd.DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]})

0 commit comments

Comments
 (0)