diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index b0aafbc22562e..0636d2edcd0fe 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1240,18 +1240,8 @@ a common dtype will be determined in the same way as ``DataFrame`` construction. Control grouped column(s) placement with ``group_keys`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionchanged:: 1.5.0 - - If ``group_keys=True`` is specified when calling :meth:`~DataFrame.groupby`, - functions passed to ``apply`` that return like-indexed outputs will have the - group keys added to the result index. Previous versions of pandas would add - the group keys only when the result from the applied function had a different - index than the input. If ``group_keys`` is not specified, the group keys will - not be added for like-indexed outputs. In the future this behavior - will change to always respect ``group_keys``, which defaults to ``True``. - To control whether the grouped column(s) are included in the indices, you can use -the argument ``group_keys``. Compare +the argument ``group_keys`` which defaults to ``True``. Compare .. ipython:: python diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 767e40e377aed..ccc6949e6f2e0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10989,7 +10989,7 @@ def resample( level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, - group_keys: bool | lib.NoDefault = no_default, + group_keys: bool = False, ) -> Resampler: return super().resample( rule=rule, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 497bacfa24812..586d7d653599b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8509,7 +8509,7 @@ def resample( level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, - group_keys: bool_t | lib.NoDefault = lib.no_default, + group_keys: bool_t = False, ) -> Resampler: """ Resample time-series data. @@ -8570,17 +8570,20 @@ def resample( .. versionadded:: 1.1.0 - group_keys : bool, optional + group_keys : bool, default False Whether to include the group keys in the result index when using - ``.apply()`` on the resampled object. Not specifying ``group_keys`` - will retain values-dependent behavior from pandas 1.4 - and earlier (see :ref:`pandas 1.5.0 Release notes - ` - for examples). In a future version of pandas, the behavior will - default to the same as specifying ``group_keys=False``. + ``.apply()`` on the resampled object. .. versionadded:: 1.5.0 + Not specifying ``group_keys`` will retain values-dependent behavior + from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes + ` for examples). + + .. versionchanged:: 2.0.0 + + ``group_keys`` now defaults to ``False``. + Returns ------- pandas.core.Resampler diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index dee68c01587b1..c676999cf128d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -627,7 +627,8 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): axis: AxisInt grouper: ops.BaseGrouper keys: _KeysArgType | None = None - group_keys: bool | lib.NoDefault + level: IndexLabel | None = None + group_keys: bool @final def __len__(self) -> int: @@ -905,7 +906,7 @@ def __init__( selection: IndexLabel | None = None, as_index: bool = True, sort: bool = True, - group_keys: bool | lib.NoDefault = True, + group_keys: bool = True, observed: bool = False, dropna: bool = True, ) -> None: @@ -4240,7 +4241,7 @@ def get_groupby( by: _KeysArgType | None = None, axis: AxisInt = 0, grouper: ops.BaseGrouper | None = None, - group_keys: bool | lib.NoDefault = True, + group_keys: bool = True, ) -> GroupBy: klass: type[GroupBy] if isinstance(obj, Series): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f23256c64db2d..9ab56563135c6 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -152,7 +152,7 @@ def __init__( kind=None, *, gpr_index: Index, - group_keys: bool | lib.NoDefault = lib.no_default, + group_keys: bool = False, selection=None, ) -> None: self._timegrouper = timegrouper @@ -1584,7 +1584,7 @@ def __init__( origin: Literal["epoch", "start", "start_day", "end", "end_day"] | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, - group_keys: bool | lib.NoDefault = True, + group_keys: bool = False, **kwargs, ) -> None: # Check for correctness of the keyword arguments which would diff --git a/pandas/core/series.py b/pandas/core/series.py index e8564f534c166..1725f754ce065 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -35,10 +35,7 @@ reshape, ) from pandas._libs.internals import BlockValuesRefs -from pandas._libs.lib import ( - is_range_indexer, - no_default, -) +from pandas._libs.lib import is_range_indexer from pandas._typing import ( AggFuncType, AlignJoin, @@ -5701,7 +5698,7 @@ def resample( level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, - group_keys: bool | lib.NoDefault = no_default, + group_keys: bool = False, ) -> Resampler: return super().resample( rule=rule, diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 184b77c880238..ddd59a55b546d 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -125,21 +125,24 @@ Specifying ``sort=False`` with an ordered categorical grouper will no longer sort the values. -group_keys : bool, optional +group_keys : bool, default True When calling apply and the ``by`` argument produces a like-indexed (i.e. :ref:`a transform `) result, add group keys to index to identify pieces. By default group keys are not included when the result's index (and column) labels match the inputs, and - are included otherwise. This argument has no effect if the result produced - is not like-indexed with respect to the input. + are included otherwise. .. versionchanged:: 1.5.0 - Warns that `group_keys` will no longer be ignored when the + Warns that ``group_keys`` will no longer be ignored when the result from ``apply`` is a like-indexed Series or DataFrame. Specify ``group_keys`` explicitly to include the group keys or not. + .. versionchanged:: 2.0.0 + + ``group_keys`` now defaults to ``True``. + observed : bool, default False This only applies if any of the groupers are Categoricals. If True: only show observed values for categorical groupers. diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 7ce4f482b6414..b36a6295248cd 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -96,24 +96,22 @@ def test_resample_group_keys(): df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10)) expected = df.copy() - # no warning + # group_keys=False g = df.resample("5D", group_keys=False) - with tm.assert_produces_warning(None): - result = g.apply(lambda x: x) + result = g.apply(lambda x: x) tm.assert_frame_equal(result, expected) - # no warning, group keys - expected.index = pd.MultiIndex.from_arrays( - [pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index] - ) - + # group_keys defaults to False g = df.resample("5D") result = g.apply(lambda x: x) tm.assert_frame_equal(result, expected) + # group_keys=True + expected.index = pd.MultiIndex.from_arrays( + [pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index] + ) g = df.resample("5D", group_keys=True) - with tm.assert_produces_warning(None): - result = g.apply(lambda x: x) + result = g.apply(lambda x: x) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 425eef69c52ae..fdc09246b479a 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -280,7 +280,7 @@ def f(x): tm.assert_frame_equal(result, expected) # A case for series - expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f) + expected = df["col1"].groupby(pd.Grouper(freq="M"), group_keys=False).apply(f) result = df["col1"].resample("M").apply(f) tm.assert_series_equal(result, expected)