diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index 9c043e8db9389..9eeaaf97d8ac6 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -210,7 +210,7 @@ def time_pivot_table_categorical_observed(self): ) def time_pivot_table_margins_only_column(self): - self.df.pivot_table(columns=["key2", "key3"], margins=True) + self.df.pivot_table(columns=["key1", "key2", "key3"], margins=True) class Crosstab: diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 20e99d007c798..85c49d2a4bafd 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -570,6 +570,7 @@ Removal of prior version deprecations/changes - Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`) - Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`) - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`) +- Changed default of ``numeric_only`` to ``False`` in :meth:`.DataFrameGroupBy.sum` and :meth:`.DataFrameGroupBy.mean` (:issue:`46072`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b5e904a7d3882..768bd1038acaf 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2046,7 +2046,7 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike: @Substitution(see_also=_common_see_also) def mean( self, - numeric_only: bool | lib.NoDefault = lib.no_default, + numeric_only: bool = False, engine: str = "cython", engine_kwargs: dict[str, bool] | None = None, ): @@ -2055,12 +2055,12 @@ def mean( Parameters ---------- - numeric_only : bool, default True + numeric_only : bool, default False Include only float, int, boolean columns. .. versionchanged:: 2.0.0 - numeric_only no longer accepts ``None``. + numeric_only no longer accepts ``None`` and defaults to ``False``. engine : str, default None * ``'cython'`` : Runs the operation through C-extensions from cython. @@ -2117,7 +2117,6 @@ def mean( 2 4.0 Name: B, dtype: float64 """ - numeric_only_bool = self._resolve_numeric_only("mean", numeric_only, axis=0) if maybe_use_numba(engine): from pandas.core._numba.kernels import sliding_mean @@ -2126,7 +2125,7 @@ def mean( else: result = self._cython_agg_general( "mean", - alt=lambda x: Series(x).mean(numeric_only=numeric_only_bool), + alt=lambda x: Series(x).mean(numeric_only=numeric_only), numeric_only=numeric_only, ) return result.__finalize__(self.obj, method="groupby") @@ -2379,10 +2378,10 @@ def size(self) -> DataFrame | Series: return self._reindex_output(result, fill_value=0) @final - @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0) + @doc(_groupby_agg_method_template, fname="sum", no=False, mc=0) def sum( self, - numeric_only: bool | lib.NoDefault = lib.no_default, + numeric_only: bool = False, min_count: int = 0, engine: str | None = None, engine_kwargs: dict[str, bool] | None = None, diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index 711f1835446a5..1f46442ee13b0 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -1,6 +1,12 @@ import pytest -from pandas.core.dtypes.common import is_numeric_dtype +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_numeric_dtype, + is_object_dtype, + is_period_dtype, + is_string_dtype, +) import pandas as pd import pandas._testing as tm @@ -100,17 +106,19 @@ def test_in_numeric_groupby(self, data_for_grouping): ) dtype = data_for_grouping.dtype - if is_numeric_dtype(dtype) or dtype.name == "decimal": - warn = None - else: - warn = FutureWarning - msg = "The default value of numeric_only" - with tm.assert_produces_warning(warn, match=msg): - result = df.groupby("A").sum().columns - - if data_for_grouping.dtype._is_numeric: + if ( + is_numeric_dtype(dtype) + or is_bool_dtype(dtype) + or dtype.name == "decimal" + or is_string_dtype(dtype) + or is_period_dtype(dtype) + or is_object_dtype(dtype) + ): expected = pd.Index(["B", "C"]) + result = df.groupby("A").sum().columns else: expected = pd.Index(["C"]) - + with pytest.raises(TypeError, match="does not support"): + df.groupby("A").sum().columns + result = df.groupby("A").sum(numeric_only=True).columns tm.assert_index_equal(result, expected) diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index c72e8158a0725..d6e3298e83c3e 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -71,9 +71,7 @@ def test_metadata_propagation_indiv_groupby(self): "D": np.random.randn(8), } ) - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A").sum() + result = df.groupby("A").sum() tm.assert_metadata_equivalent(df, result) def test_metadata_propagation_indiv_resample(self): diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 6fa5d210b8d15..5c250618bf3c4 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -103,7 +103,10 @@ def test_basic(): # TODO: split this test gb = df.groupby("A", observed=False) exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True) expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)}) - result = gb.sum() + msg = "category type does not support sum operations" + with pytest.raises(TypeError, match=msg): + gb.sum() + result = gb.sum(numeric_only=True) tm.assert_frame_equal(result, expected) # GH 8623 @@ -338,7 +341,9 @@ def test_observed(observed): gb = df.groupby(["A", "B"], observed=observed) exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) - expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index) + expected = DataFrame( + {"values": [1, 2, 3, 4], "C": ["foo", "bar", "foo", "bar"]}, index=exp_index + ) result = gb.sum() if not observed: expected = cartesian_product_for_groupers( diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index bc84a877cd75f..0a7080abd8700 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -166,9 +166,12 @@ def test_averages(self, df, method): ], ) - with pytest.raises(TypeError, match="[Cc]ould not convert"): - getattr(gb, method)(numeric_only=False) - result = getattr(gb, method)() + if method == "mean": + with pytest.raises(TypeError, match="[Cc]ould not convert"): + getattr(gb, method)() + result = getattr(gb, method)(numeric_only=True) + else: + result = getattr(gb, method)() tm.assert_frame_equal(result.reindex_like(expected), expected) expected_columns = expected.columns @@ -264,6 +267,15 @@ def _check(self, df, method, expected_columns, expected_columns_numeric): ) with pytest.raises(exception, match=msg): getattr(gb, method)() + elif method in ("sum", "mean"): + msg = "|".join( + [ + "category type does not support sum operations", + "Could not convert", + ] + ) + with pytest.raises(exception, match=msg): + getattr(gb, method)() else: result = getattr(gb, method)() tm.assert_index_equal(result.columns, expected_columns_numeric) @@ -1375,7 +1387,7 @@ def test_groupby_sum_timedelta_with_nat(): ("idxmin", True, True), ("last", False, True), ("max", False, True), - ("mean", True, True), + ("mean", False, True), ("median", True, True), ("min", False, True), ("nth", False, False), @@ -1386,7 +1398,7 @@ def test_groupby_sum_timedelta_with_nat(): ("sem", True, True), ("skew", True, True), ("std", True, True), - ("sum", True, True), + ("sum", False, True), ("var", True, True), ], ) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 96be7a0cb785c..f8ee17fe80f27 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -435,16 +435,19 @@ def test_frame_set_name_single(df): grouped = df.groupby("A") msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.mean() + with pytest.raises(TypeError, match="Could not convert"): + grouped.mean() + result = grouped.mean(numeric_only=True) assert result.index.name == "A" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A", as_index=False).mean() + with pytest.raises(TypeError, match="Could not convert"): + df.groupby("A", as_index=False).mean() + result = df.groupby("A", as_index=False).mean(numeric_only=True) assert result.index.name != "A" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.agg(np.mean) + with pytest.raises(TypeError, match="Could not convert"): + grouped.agg(np.mean) + result = grouped[["C", "D"]].agg(np.mean) assert result.index.name == "A" result = grouped.agg({"C": np.mean, "D": np.std}) @@ -467,10 +470,10 @@ def test_multi_func(df): col2 = df["B"] grouped = df.groupby([col1.get, col2.get]) - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - agged = grouped.mean() - expected = df.groupby(["A", "B"]).mean() + with pytest.raises(TypeError, match="Could not convert"): + grouped.mean() + agged = grouped.mean(numeric_only=True) + expected = df.groupby(["A", "B"]).mean() # TODO groupby get drops names tm.assert_frame_equal( @@ -665,16 +668,19 @@ def test_groupby_as_index_agg(df): # single-key - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.agg(np.mean) - expected = grouped.mean() + with pytest.raises(TypeError, match="Could not convert"): + grouped.agg(np.mean) + result = grouped[["C", "D"]].agg(np.mean) + with pytest.raises(TypeError, match="Could not convert"): + grouped.mean() + expected = grouped.mean(numeric_only=True) tm.assert_frame_equal(result, expected) result2 = grouped.agg({"C": np.mean, "D": np.sum}) - with tm.assert_produces_warning(FutureWarning, match=msg): - expected2 = grouped.mean() - expected2["D"] = grouped.sum()["D"] + with pytest.raises(TypeError, match="Could not convert"): + grouped.mean() + expected2 = grouped.mean(numeric_only=True) + expected2["D"] = grouped.sum()["D"] tm.assert_frame_equal(result2, expected2) grouped = df.groupby("A", as_index=True) @@ -755,10 +761,8 @@ def test_as_index_series_return_frame(df): grouped = df.groupby("A", as_index=False) grouped2 = df.groupby(["A", "B"], as_index=False) - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped["C"].agg(np.sum) - expected = grouped.agg(np.sum).loc[:, ["A", "C"]] + result = grouped["C"].agg(np.sum) + expected = grouped.agg(np.sum).loc[:, ["A", "C"]] assert isinstance(result, DataFrame) tm.assert_frame_equal(result, expected) @@ -768,8 +772,7 @@ def test_as_index_series_return_frame(df): tm.assert_frame_equal(result2, expected2) result = grouped["C"].sum() - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = grouped.sum().loc[:, ["A", "C"]] + expected = grouped.sum().loc[:, ["A", "C"]] assert isinstance(result, DataFrame) tm.assert_frame_equal(result, expected) @@ -793,10 +796,12 @@ def test_groupby_as_index_cython(df): # single-key grouped = data.groupby("A", as_index=False) - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.mean() - expected = data.groupby(["A"]).mean() + with pytest.raises(TypeError, match="Could not convert"): + grouped.mean() + result = grouped.mean(numeric_only=True) + with pytest.raises(TypeError, match="Could not convert"): + data.groupby(["A"]).mean() + expected = data.groupby(["A"]).mean(numeric_only=True) expected.insert(0, "A", expected.index) expected.index = np.arange(len(expected)) tm.assert_frame_equal(result, expected) @@ -863,21 +868,21 @@ def test_groupby_multi_corner(df): tm.assert_frame_equal(agged, expected) -def test_omit_nuisance(df): +def test_raises_on_nuisance(df): grouped = df.groupby("A") - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - agged = grouped.agg(np.mean) - exp = grouped.mean() - tm.assert_frame_equal(agged, exp) + with pytest.raises(TypeError, match="Could not convert"): + grouped.agg(np.mean) + with pytest.raises(TypeError, match="Could not convert"): + grouped.mean() df = df.loc[:, ["A", "C", "D"]] df["E"] = datetime.now() grouped = df.groupby("A") - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.agg(np.sum) - expected = grouped.sum() - tm.assert_frame_equal(result, expected) + msg = "datetime64 type does not support sum operations" + with pytest.raises(TypeError, match=msg): + grouped.agg(np.sum) + with pytest.raises(TypeError, match=msg): + grouped.sum() # won't work with axis = 1 grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1) @@ -964,14 +969,12 @@ def test_omit_nuisance_warnings(df): tm.assert_frame_equal(result, expected) -def test_omit_nuisance_python_multiple(three_group): +def test_raise_on_nuisance_python_multiple(three_group): grouped = three_group.groupby(["A", "B"]) - - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - agged = grouped.agg(np.mean) - exp = grouped.mean() - tm.assert_frame_equal(agged, exp) + with pytest.raises(TypeError, match="Could not convert"): + grouped.agg(np.mean) + with pytest.raises(TypeError, match="Could not convert"): + grouped.mean() def test_empty_groups_corner(mframe): @@ -987,10 +990,12 @@ def test_empty_groups_corner(mframe): ) grouped = df.groupby(["k1", "k2"]) - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.agg(np.mean) - expected = grouped.mean() + with pytest.raises(TypeError, match="Could not convert"): + grouped.agg(np.mean) + result = grouped[["v1", "v2"]].agg(np.mean) + with pytest.raises(TypeError, match="Could not convert"): + grouped.mean() + expected = grouped.mean(numeric_only=True) tm.assert_frame_equal(result, expected) grouped = mframe[3:5].groupby(level=0) @@ -1012,9 +1017,9 @@ def test_wrap_aggregated_output_multindex(mframe): df["baz", "two"] = "peekaboo" keys = [np.array([0, 0, 1]), np.array([0, 0, 1])] - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - agged = df.groupby(keys).agg(np.mean) + with pytest.raises(TypeError, match="Could not convert"): + df.groupby(keys).agg(np.mean) + agged = df.drop(columns=("baz", "two")).groupby(keys).agg(np.mean) assert isinstance(agged.columns, MultiIndex) def aggfun(ser): @@ -1170,22 +1175,17 @@ def test_groupby_with_hier_columns(): # add a nuisance column sorted_columns, _ = columns.sortlevel(0) df["A", "foo"] = "bar" - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(level=0).mean() + with pytest.raises(TypeError, match="Could not convert"): + df.groupby(level=0).mean() + result = df.groupby(level=0).mean(numeric_only=True) tm.assert_index_equal(result.columns, df.columns[:-1]) def test_grouping_ndarray(df): grouped = df.groupby(df["A"].values) - - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.sum() - expected = df.groupby("A").sum() - tm.assert_frame_equal( - result, expected, check_names=False - ) # Note: no names when grouping by value + result = grouped.sum() + expected = df.groupby(df["A"].rename(None)).sum() + tm.assert_frame_equal(result, expected) def test_groupby_wrong_multi_labels(): @@ -1210,10 +1210,12 @@ def test_groupby_wrong_multi_labels(): def test_groupby_series_with_name(df): - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(df["A"]).mean() - result2 = df.groupby(df["A"], as_index=False).mean() + with pytest.raises(TypeError, match="Could not convert"): + df.groupby(df["A"]).mean() + result = df.groupby(df["A"]).mean(numeric_only=True) + with pytest.raises(TypeError, match="Could not convert"): + df.groupby(df["A"], as_index=False).mean() + result2 = df.groupby(df["A"], as_index=False).mean(numeric_only=True) assert result.index.name == "A" assert "A" in result2 @@ -1364,10 +1366,12 @@ def test_groupby_unit64_float_conversion(): def test_groupby_list_infer_array_like(df): - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(list(df["A"])).mean() - expected = df.groupby(df["A"]).mean() + with pytest.raises(TypeError, match="Could not convert"): + df.groupby(list(df["A"])).mean() + result = df.groupby(list(df["A"])).mean(numeric_only=True) + with pytest.raises(TypeError, match="Could not convert"): + df.groupby(df["A"]).mean() + expected = df.groupby(df["A"]).mean(numeric_only=True) tm.assert_frame_equal(result, expected, check_names=False) with pytest.raises(KeyError, match=r"^'foo'$"): @@ -1480,9 +1484,9 @@ def test_groupby_2d_malformed(): d["zeros"] = [0, 0] d["ones"] = [1, 1] d["label"] = ["l1", "l2"] - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - tmp = d.groupby(["group"]).mean() + with pytest.raises(TypeError, match="Could not convert"): + d.groupby(["group"]).mean() + tmp = d.groupby(["group"]).mean(numeric_only=True) res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) tm.assert_numpy_array_equal(tmp.values, res_values) @@ -1648,13 +1652,10 @@ def f(group): def test_no_dummy_key_names(df): # see gh-1291 - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(df["A"].values).sum() + result = df.groupby(df["A"].values).sum() assert result.index.name is None - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby([df["A"].values, df["B"].values]).sum() + result = df.groupby([df["A"].values, df["B"].values]).sum() assert result.index.names == (None, None) @@ -1972,11 +1973,11 @@ def test_empty_groupby(columns, keys, values, method, op, request, using_array_m gb = df.groupby(keys, group_keys=False)[columns] - def get_result(): + def get_result(**kwargs): if method == "attr": - return getattr(gb, op)() + return getattr(gb, op)(**kwargs) else: - return getattr(gb, method)(op) + return getattr(gb, method)(op, **kwargs) if columns == "C": # i.e. SeriesGroupBy @@ -2011,7 +2012,14 @@ def get_result(): if df.dtypes[0].kind == "M": # GH#41291 # datetime64 -> prod and sum are invalid - result = get_result() + if op == "sum": + with pytest.raises( + TypeError, match="datetime64 type does not support" + ): + get_result() + result = get_result(numeric_only=True) + else: + result = get_result() # with numeric_only=True, these are dropped, and we get # an empty DataFrame back @@ -2022,7 +2030,14 @@ def get_result(): elif isinstance(values, Categorical): # GH#41291 # Categorical doesn't implement sum or prod - result = get_result() + if op == "sum": + with pytest.raises( + TypeError, match="category type does not support" + ): + get_result() + result = get_result(numeric_only=True) + else: + result = get_result() # with numeric_only=True, these are dropped, and we get # an empty DataFrame back @@ -2042,7 +2057,10 @@ def get_result(): result = get_result() # In this case we have list-of-list, will raise TypeError, # and subsequently be dropped as nuisance columns - expected = df.set_index(keys)[[]] + if op == "sum": + expected = df.set_index(keys)[["C"]] + else: + expected = df.set_index(keys)[[]] tm.assert_equal(result, expected) return @@ -2660,14 +2678,12 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype(): ) expected = DataFrame( - {"z": [7, 8]}, + {"y": [Timedelta(7, "days"), Timedelta(8, "days")], "z": [7, 8]}, index=Index([0, 1], dtype="int64", name="x"), ) gb = df.groupby(by=["x"]) - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.sum() + result = gb.sum() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index c947ff03fd09d..2cc4d376c6abe 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -59,10 +59,12 @@ def test_column_select_via_attr(self, df): tm.assert_series_equal(result, expected) df["mean"] = 1.5 - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A").mean() - expected = df.groupby("A").agg(np.mean) + with pytest.raises(TypeError, match="Could not convert"): + df.groupby("A").mean() + result = df.groupby("A").mean(numeric_only=True) + with pytest.raises(TypeError, match="Could not convert"): + df.groupby("A").agg(np.mean) + expected = df.groupby("A")[["C", "D", "mean"]].agg(np.mean) tm.assert_frame_equal(result, expected) def test_getitem_list_of_columns(self): @@ -288,30 +290,53 @@ def test_grouper_column_and_index(self): {"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]}, index=idx, ) - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean() - expected = df_multi.reset_index().groupby(["B", "inner"]).mean() + result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean( + numeric_only=True + ) + with pytest.raises(TypeError, match="Could not convert"): + df_multi.reset_index().groupby(["B", "inner"]).mean() + expected = ( + df_multi.reset_index().groupby(["B", "inner"]).mean(numeric_only=True) + ) tm.assert_frame_equal(result, expected) # Test the reverse grouping order - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean() - expected = df_multi.reset_index().groupby(["inner", "B"]).mean() + result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean( + numeric_only=True + ) + with pytest.raises(TypeError, match="Could not convert"): + df_multi.reset_index().groupby(["inner", "B"]).mean() + expected = ( + df_multi.reset_index().groupby(["inner", "B"]).mean(numeric_only=True) + ) tm.assert_frame_equal(result, expected) # Grouping a single-index frame by a column and the index should # be equivalent to resetting the index and grouping by two columns df_single = df_multi.reset_index("outer") - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() - expected = df_single.reset_index().groupby(["B", "inner"]).mean() + with pytest.raises(TypeError, match="Could not convert"): + df_single.groupby(["B", pd.Grouper(level="inner")]).mean() + result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean( + numeric_only=True + ) + with pytest.raises(TypeError, match="Could not convert"): + df_single.reset_index().groupby(["B", "inner"]).mean() + expected = ( + df_single.reset_index().groupby(["B", "inner"]).mean(numeric_only=True) + ) tm.assert_frame_equal(result, expected) # Test the reverse grouping order - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() - expected = df_single.reset_index().groupby(["inner", "B"]).mean() + with pytest.raises(TypeError, match="Could not convert"): + df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() + result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean( + numeric_only=True + ) + with pytest.raises(TypeError, match="Could not convert"): + df_single.reset_index().groupby(["inner", "B"]).mean() + expected = ( + df_single.reset_index().groupby(["inner", "B"]).mean(numeric_only=True) + ) tm.assert_frame_equal(result, expected) def test_groupby_levels_and_columns(self): @@ -385,10 +410,12 @@ def test_empty_groups(self, df): def test_groupby_grouper(self, df): grouped = df.groupby("A") - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(grouped.grouper).mean() - expected = grouped.mean() + with pytest.raises(TypeError, match="Could not convert"): + df.groupby(grouped.grouper).mean() + result = df.groupby(grouped.grouper).mean(numeric_only=True) + with pytest.raises(TypeError, match="Could not convert"): + grouped.mean() + expected = grouped.mean(numeric_only=True) tm.assert_frame_equal(result, expected) def test_groupby_dict_mapping(self): diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py index 501a21981a148..d3d34dfd6f90a 100644 --- a/pandas/tests/groupby/test_index_as_string.py +++ b/pandas/tests/groupby/test_index_as_string.py @@ -47,9 +47,15 @@ def series(): ], ) def test_grouper_index_level_as_string(frame, key_strs, groupers): - warn = FutureWarning if "B" not in key_strs or "outer" in frame.columns else None - msg = "The default value of numeric_only" - with tm.assert_produces_warning(warn, match=msg): + if "B" not in key_strs or "outer" in frame.columns: + with pytest.raises(TypeError, match="Could not convert"): + frame.groupby(key_strs).mean() + result = frame.groupby(key_strs).mean(numeric_only=True) + + with pytest.raises(TypeError, match="Could not convert"): + frame.groupby(groupers).mean() + expected = frame.groupby(groupers).mean(numeric_only=True) + else: result = frame.groupby(key_strs).mean() expected = frame.groupby(groupers).mean() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index 4f58bcb5ee763..7e9e0abf55b71 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -55,14 +55,11 @@ def test_pipe_args(): ) def f(dfgb, arg1): - return dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False).groupby( - dfgb.grouper - ) + filtered = dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False) + return filtered.groupby("group") def g(dfgb, arg2): - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - return dfgb.sum() / dfgb.sum().sum() + arg2 + return dfgb.sum() / dfgb.sum().sum() + arg2 def h(df, arg3): return df.x + df.y - arg3 @@ -70,10 +67,10 @@ def h(df, arg3): result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100) # Assert the results here - index = Index(["A", "B", "C"], name="group") - expected = pd.Series([-79.5160891089, -78.4839108911, -80], index=index) + index = Index(["A", "B"], name="group") + expected = pd.Series([-79.5160891089, -78.4839108911], index=index) - tm.assert_series_equal(expected, result) + tm.assert_series_equal(result, expected) # test SeriesGroupby.pipe ser = pd.Series([1, 1, 2, 2, 3, 3]) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index a71719f3da4f7..d5b4d9ae44dab 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -98,25 +98,26 @@ def test_groupby_with_timegrouper(self): df = df.set_index(["Date"]) expected = DataFrame( - {"Quantity": 0}, + {"Buyer": 0, "Quantity": 0}, index=date_range( "20130901", "20131205", freq="5D", name="Date", inclusive="left" ), ) - expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64") + expected.iloc[0, 0] = "CarlCarlCarl" + expected.iloc[6, 0] = "CarlCarl" + expected.iloc[18, 0] = "Joe" + expected.iloc[[0, 6, 18], 1] = np.array([24, 6, 9], dtype="int64") msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): result1 = df.resample("5D").sum() - tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result1, expected[["Quantity"]]) df_sorted = df.sort_index() - with tm.assert_produces_warning(FutureWarning, match=msg): - result2 = df_sorted.groupby(Grouper(freq="5D")).sum() + result2 = df_sorted.groupby(Grouper(freq="5D")).sum() tm.assert_frame_equal(result2, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - result3 = df.groupby(Grouper(freq="5D")).sum() + result3 = df.groupby(Grouper(freq="5D")).sum() tm.assert_frame_equal(result3, expected) @pytest.mark.parametrize("should_sort", [True, False]) @@ -191,8 +192,7 @@ def test_timegrouper_with_reg_groups(self): ).set_index(["Date", "Buyer"]) msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby([Grouper(freq="A"), "Buyer"]).sum() + result = df.groupby([Grouper(freq="A"), "Buyer"]).sum(numeric_only=True) tm.assert_frame_equal(result, expected) expected = DataFrame( @@ -207,8 +207,7 @@ def test_timegrouper_with_reg_groups(self): ], } ).set_index(["Date", "Buyer"]) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum() + result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum(numeric_only=True) tm.assert_frame_equal(result, expected) df_original = DataFrame( @@ -246,13 +245,10 @@ def test_timegrouper_with_reg_groups(self): } ).set_index(["Date", "Buyer"]) - warn_msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum() + result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum(numeric_only=True) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum() + result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum(numeric_only=True) expected = DataFrame( { "Buyer": "Carl Joe Mark".split(), @@ -268,8 +264,9 @@ def test_timegrouper_with_reg_groups(self): # passing the name df = df.reset_index() - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() + result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum( + numeric_only=True + ) tm.assert_frame_equal(result, expected) with pytest.raises(KeyError, match="'The grouper name foo is not found'"): @@ -277,11 +274,13 @@ def test_timegrouper_with_reg_groups(self): # passing the level df = df.set_index("Date") - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum() + result = df.groupby([Grouper(freq="1M", level="Date"), "Buyer"]).sum( + numeric_only=True + ) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum() + result = df.groupby([Grouper(freq="1M", level=0), "Buyer"]).sum( + numeric_only=True + ) tm.assert_frame_equal(result, expected) with pytest.raises(ValueError, match="The level foo is not valid"): @@ -290,8 +289,9 @@ def test_timegrouper_with_reg_groups(self): # multi names df = df.copy() df["Date"] = df.index + offsets.MonthEnd(2) - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() + result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum( + numeric_only=True + ) expected = DataFrame( { "Buyer": "Carl Joe Mark".split(), @@ -320,22 +320,18 @@ def test_timegrouper_with_reg_groups(self): [datetime(2013, 10, 31, 0, 0)], freq=offsets.MonthEnd(), name="Date" ), ) - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = df.groupby(Grouper(freq="1M")).sum() + result = df.groupby(Grouper(freq="1M")).sum(numeric_only=True) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = df.groupby([Grouper(freq="1M")]).sum() + result = df.groupby([Grouper(freq="1M")]).sum(numeric_only=True) tm.assert_frame_equal(result, expected) expected.index = expected.index.shift(1) assert expected.index.freq == offsets.MonthEnd() - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = df.groupby(Grouper(freq="1M", key="Date")).sum() + result = df.groupby(Grouper(freq="1M", key="Date")).sum(numeric_only=True) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = df.groupby([Grouper(freq="1M", key="Date")]).sum() + result = df.groupby([Grouper(freq="1M", key="Date")]).sum(numeric_only=True) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("freq", ["D", "M", "A", "Q-APR"]) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index ad8051792266e..001b1d7bec8eb 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -440,10 +440,12 @@ def test_transform_exclude_nuisance(df): def test_transform_function_aliases(df): - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A").transform("mean") - expected = df.groupby("A").transform(np.mean) + with pytest.raises(TypeError, match="Could not convert"): + df.groupby("A").transform("mean") + result = df.groupby("A").transform("mean", numeric_only=True) + with pytest.raises(TypeError, match="Could not convert"): + df.groupby("A").transform(np.mean) + expected = df.groupby("A")[["C", "D"]].transform(np.mean) tm.assert_frame_equal(result, expected) result = df.groupby("A")["C"].transform("mean") diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index dd2c59ec161e7..4b32022e177e8 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -569,9 +569,9 @@ def test_mixed_type_join_with_suffix(self): df.insert(5, "dt", "foo") grouped = df.groupby("id") - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - mn = grouped.mean() + with pytest.raises(TypeError, match="Could not convert"): + grouped.mean() + mn = grouped.mean(numeric_only=True) cn = grouped.count() # it works! diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f9119ea43160b..9c1a07dd3cde4 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -147,10 +147,8 @@ def test_pivot_table_nocols(self): df = DataFrame( {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]} ) - msg = "pivot_table dropped a column because it failed to aggregate" - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = df.pivot_table(columns="cols", aggfunc=np.sum) - xp = df.pivot_table(index="cols", aggfunc=np.sum).T + rs = df.pivot_table(columns="cols", aggfunc=np.sum) + xp = df.pivot_table(index="cols", aggfunc=np.sum).T tm.assert_frame_equal(rs, xp) rs = df.pivot_table(columns="cols", aggfunc={"values": "mean"}) @@ -911,15 +909,20 @@ def test_no_col(self, data): # to help with a buglet data.columns = [k * 2 for k in data.columns] - msg = "pivot_table dropped a column because it failed to aggregate" - with tm.assert_produces_warning(FutureWarning, match=msg): - table = data.pivot_table(index=["AA", "BB"], margins=True, aggfunc=np.mean) + with pytest.raises(TypeError, match="Could not convert"): + data.pivot_table(index=["AA", "BB"], margins=True, aggfunc=np.mean) + table = data.drop(columns="CC").pivot_table( + index=["AA", "BB"], margins=True, aggfunc=np.mean + ) for value_col in table.columns: totals = table.loc[("All", ""), value_col] assert totals == data[value_col].mean() - with tm.assert_produces_warning(FutureWarning, match=msg): - table = data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") + with pytest.raises(TypeError, match="Could not convert"): + data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") + table = data.drop(columns="CC").pivot_table( + index=["AA", "BB"], margins=True, aggfunc="mean" + ) for item in ["DD", "EE", "FF"]: totals = table.loc[("All", ""), item] assert totals == data[item].mean() @@ -936,7 +939,10 @@ def test_no_col(self, data): ( ["A", "B"], "sum", - [[9, 13, 22, 5, 6, 11], [14, 18, 32, 11, 11, 22]], + [ + [9, 13, 22, 5, 6, 11], + [14, 18, 32, 11, 11, 22], + ], MultiIndex.from_tuples( [ ("bar", "one"), @@ -974,10 +980,14 @@ def test_margin_with_only_columns_defined( "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], } ) - - msg = "pivot_table dropped a column because it failed to aggregate" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) + if aggfunc != "sum": + with pytest.raises(TypeError, match="Could not convert"): + df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) + if "B" not in columns: + df = df.drop(columns="B") + result = df.drop(columns="C").pivot_table( + columns=columns, margins=True, aggfunc=aggfunc + ) expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns) tm.assert_frame_equal(result, expected) @@ -2003,11 +2013,9 @@ def test_pivot_string_as_func(self): def test_pivot_string_func_vs_func(self, f, f_numpy, data): # GH #18713 # for consistency purposes - - msg = "pivot_table dropped a column because it failed to aggregate" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = pivot_table(data, index="A", columns="B", aggfunc=f) - expected = pivot_table(data, index="A", columns="B", aggfunc=f_numpy) + data = data.drop(columns="C") + result = pivot_table(data, index="A", columns="B", aggfunc=f) + expected = pivot_table(data, index="A", columns="B", aggfunc=f_numpy) tm.assert_frame_equal(result, expected) @pytest.mark.slow