-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
DEPR: Change numeric_only to False in various groupby ops #49892
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e87c40c
8a507e5
611d543
dbcab6b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2136,7 +2136,7 @@ def mean( | |
@final | ||
@Substitution(name="groupby") | ||
@Appender(_common_see_also) | ||
def median(self, numeric_only: bool | lib.NoDefault = lib.no_default): | ||
def median(self, numeric_only: bool = False): | ||
""" | ||
Compute median of groups, excluding missing values. | ||
|
||
|
@@ -2173,7 +2173,7 @@ def std( | |
ddof: int = 1, | ||
engine: str | None = None, | ||
engine_kwargs: dict[str, bool] | None = None, | ||
numeric_only: bool | lib.NoDefault = lib.no_default, | ||
numeric_only: bool = False, | ||
): | ||
""" | ||
Compute standard deviation of groups, excluding missing values. | ||
|
@@ -2202,11 +2202,15 @@ def std( | |
|
||
.. versionadded:: 1.4.0 | ||
|
||
numeric_only : bool, default True | ||
numeric_only : bool, default False | ||
Include only `float`, `int` or `boolean` data. | ||
|
||
.. versionadded:: 1.5.0 | ||
|
||
.. versionchanged:: 2.0.0 | ||
|
||
numeric_only now defaults to ``False``. | ||
|
||
Returns | ||
------- | ||
Series or DataFrame | ||
|
@@ -2236,7 +2240,6 @@ def std( | |
post_processing=lambda vals, inference: np.sqrt(vals), | ||
ddof=ddof, | ||
) | ||
self._maybe_warn_numeric_only_depr("std", result, numeric_only) | ||
return result | ||
|
||
@final | ||
|
@@ -2247,7 +2250,7 @@ def var( | |
ddof: int = 1, | ||
engine: str | None = None, | ||
engine_kwargs: dict[str, bool] | None = None, | ||
numeric_only: bool | lib.NoDefault = lib.no_default, | ||
numeric_only: bool = False, | ||
): | ||
""" | ||
Compute variance of groups, excluding missing values. | ||
|
@@ -2276,11 +2279,15 @@ def var( | |
|
||
.. versionadded:: 1.4.0 | ||
|
||
numeric_only : bool, default True | ||
numeric_only : bool, default False | ||
Include only `float`, `int` or `boolean` data. | ||
|
||
.. versionadded:: 1.5.0 | ||
|
||
.. versionchanged:: 2.0.0 | ||
|
||
numeric_only now defaults to ``False``. | ||
|
||
Returns | ||
------- | ||
Series or DataFrame | ||
|
@@ -2301,7 +2308,7 @@ def var( | |
@final | ||
@Substitution(name="groupby") | ||
@Appender(_common_see_also) | ||
def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default): | ||
def sem(self, ddof: int = 1, numeric_only: bool = False): | ||
""" | ||
Compute standard error of the mean of groups, excluding missing values. | ||
|
||
|
@@ -2317,23 +2324,22 @@ def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default | |
|
||
.. versionadded:: 1.5.0 | ||
|
||
.. versionchanged:: 2.0.0 | ||
|
||
numeric_only now defaults to ``False``. | ||
|
||
Returns | ||
------- | ||
Series or DataFrame | ||
Standard error of the mean of values within each group. | ||
""" | ||
# Reolve numeric_only so that std doesn't warn | ||
numeric_only_bool = self._resolve_numeric_only("sem", numeric_only, axis=0) | ||
if ( | ||
numeric_only_bool | ||
and self.obj.ndim == 1 | ||
and not is_numeric_dtype(self.obj.dtype) | ||
): | ||
if numeric_only and self.obj.ndim == 1 and not is_numeric_dtype(self.obj.dtype): | ||
raise TypeError( | ||
f"{type(self).__name__}.sem called with " | ||
f"numeric_only={numeric_only} and dtype {self.obj.dtype}" | ||
) | ||
result = self.std(ddof=ddof, numeric_only=numeric_only_bool) | ||
result = self.std(ddof=ddof, numeric_only=numeric_only) | ||
self._maybe_warn_numeric_only_depr("sem", result, numeric_only) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is self._maybe_warn_numeric_only_depr still needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe not, but I have another branch built on this that removes it (and should be the final PR for nuisance deprecations). |
||
|
||
if result.ndim == 1: | ||
|
@@ -2411,10 +2417,8 @@ def sum( | |
return self._reindex_output(result, fill_value=0) | ||
|
||
@final | ||
@doc(_groupby_agg_method_template, fname="prod", no=True, mc=0) | ||
def prod( | ||
self, numeric_only: bool | lib.NoDefault = lib.no_default, min_count: int = 0 | ||
): | ||
@doc(_groupby_agg_method_template, fname="prod", no=False, mc=0) | ||
def prod(self, numeric_only: bool = False, min_count: int = 0): | ||
return self._agg_general( | ||
numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod | ||
) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -239,10 +239,7 @@ def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype | |
[[1, 2, 3, 4, 5, 6]] * 3, | ||
columns=MultiIndex.from_product([["a", "b"], ["i", "j", "k"]]), | ||
).astype({("a", "j"): dtype, ("b", "j"): dtype}) | ||
warn = FutureWarning if func == "std" else None | ||
msg = "The default value of numeric_only" | ||
with tm.assert_produces_warning(warn, match=msg): | ||
result = df.groupby(level=1, axis=1).agg(func) | ||
Comment on lines
-242
to
-245
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In 1.5.x we are currently warning in a few erroneous cases with axis=1. Is this worth fixing? I think the PR would have to go straight into 1.5.x because of the enforced deprecations. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would say if we haven't gotten an issue about this yet I think it's okay not fixing for now |
||
result = df.groupby(level=1, axis=1).agg(func) | ||
expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype( | ||
result_dtype_dict | ||
) | ||
|
@@ -266,10 +263,7 @@ def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict): | |
columns=Index([10, 20, 10, 20], name="x"), | ||
dtype="int64", | ||
).astype({10: "Int64"}) | ||
warn = FutureWarning if func == "std" else None | ||
msg = "The default value of numeric_only" | ||
with tm.assert_produces_warning(warn, match=msg): | ||
result = df.groupby("x", axis=1).agg(func) | ||
result = df.groupby("x", axis=1).agg(func) | ||
expected = DataFrame( | ||
data=expected_data, | ||
index=Index([0, 1, 0], name="y"), | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still some ops enforce numeric_only=False, mostly those that use
_op_via_apply
. Will be changing this toonce all ops are done.