pandas-dev · jreback · Mar 30, 2022 · Jun 25, 2020 · Jun 26, 2020 · Jun 26, 2020
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -1019,7 +1019,7 @@ The dimension of the returned result can also change:
 
 .. ipython::
 
-    In [8]: grouped = df.groupby('A')['C']
+    In [8]: grouped = df.groupby('A', group_keys=False)['C']
 
     In [10]: def f(group):
        ....:     return pd.DataFrame({'original': group,

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -342,10 +342,15 @@ Now every group is evaluated only a single time.
 
 *New behavior*:
 
-.. ipython:: python
-
-    df.groupby("a").apply(func)
+.. code-block:: python
 
+   In [3]: df.groupby('a').apply(func)
+   x
+   y
+   Out[3]:
+      a  b
+   0  x  1
+   1  y  2
 
 Concatenating sparse values
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -780,6 +780,45 @@ Development Changes
 Deprecations
 ~~~~~~~~~~~~
 
+:meth:`~DataFrame.groupby` no longer ignores ``group_keys`` for transform-like ``apply``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:meth:`~DataFrame.groupby` will no longer ignore the ``group_keys`` argument for functions passed to ``apply`` that return like-indexed outputs (:issue:`34809`).
+Previous versions of pandas would add the group keys only when the result from the applied function had a different index to the input.
+
+.. code-block:: python
+
+   >>> # pandas 1.0.4
+   >>> df = pd.DataFrame({"A": [1, 2, 2], "B": [1, 2, 3]})
+   >>> df
+      A  B
+   0  1  1
+   1  2  2
+   2  2  3
+   >>> df.groupby("A").apply(lambda x: x.rename(np.exp))  # Different index
+               A  B
+   A
+   1 1.000000  1  1
+   2 2.718282  2  2
+     7.389056  2  3
+
+   >>> df.groupby("A").apply(lambda x: x)  # Same index
+      A  B
+   0  1  1
+   1  2  2
+   2  2  3
+
+In this future this behavior will change to always respect ``as_index``, which defaults to True.
+
+.. ipython:: python
+   :okwarning:
+
+   df = pd.DataFrame({"A": [1, 2, 2], "B": [1, 2, 3]})
+   df.groupby("A").apply(lambda x: x)
+
+Other Deprecations
+^^^^^^^^^^^^^^^^^^
+
 - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version.  Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
 
 - :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -370,7 +370,16 @@ def apply_frame_axis0(object frame, object f, object names,
                     mutated = True
             except AttributeError:
                 # `piece` might not have an index, could be e.g. an int
-                pass
+                # By definition, we are not a transform, so set mutated
+                # to True
+                mutated = True
+            if not mutated:
+                # Also check if the columns are mutated
+                try:
+                    if not piece.columns.equals(chunk.columns):
+                        mutated = True
+                except AttributeError:
+                    mutated = True
 
             if not is_scalar(piece):
                 # Need to copy data to avoid appending references

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -6420,6 +6420,26 @@ def update(
 a   13.0   13.0
 b   12.3  123.0
 NaN 12.3   33.0
+
+To exclude or include the group keys in the index, specify ``group_keys``
+
+>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+...                               'Parrot', 'Parrot'],
+...                    'Max Speed': [380., 370., 24., 26.]})
+>>> df.groupby("Animal", group_keys=False).apply(lambda x: x)
+   Animal  Max Speed
+0  Falcon      380.0
+1  Falcon      370.0
+2  Parrot       24.0
+3  Parrot       26.0
+
+>>> df.groupby("Animal", group_keys=True).apply(lambda x: x)
+          Animal  Max Speed
+Animal
+Falcon 0  Falcon      380.0
+       1  Falcon      370.0
+Parrot 2  Parrot       24.0
+       3  Parrot       26.0
 """
     )
     @Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
@@ -6430,7 +6450,7 @@ def groupby(
         level=None,
         as_index: bool = True,
         sort: bool = True,
-        group_keys: bool = True,
+        group_keys: bool = no_default,
         squeeze: bool = no_default,
         observed: bool = False,
         dropna: bool = True,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -7692,6 +7692,7 @@ def resample(
         level=None,
         origin: Union[str, TimestampConvertibleTypes] = "start_day",
         offset: Optional[TimedeltaConvertibleTypes] = None,
+        group_keys: bool_t = lib.no_default,
     ) -> "Resampler":
         """
         Resample time-series data.
@@ -7761,6 +7762,12 @@ def resample(
 
             .. versionadded:: 1.1.0
 
+        group_keys : bool, default True
+            Whether to include the group keys in the result index when performing
+            a ``.groupby().apply()`` to the resampled object.
+
+            .. versionadded:: 1.1.0
+
         Returns
         -------
         Resampler object
@@ -8077,6 +8084,7 @@ def resample(
             level=level,
             origin=origin,
             offset=offset,
+            group_keys=group_keys,
         )
 
     def first(self: FrameOrSeries, offset) -> FrameOrSeries:

@@ -30,7 +30,7 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas._typing import FrameOrSeries
+from pandas._typing import FrameOrSeries, FrameOrSeriesUnion
 from pandas.util._decorators import Appender, Substitution, doc
 
 from pandas.core.dtypes.cast import (
@@ -413,7 +413,14 @@ def _wrap_transformed_output(
         assert isinstance(result, Series)
         return result
 
-    def _wrap_applied_output(self, keys, values, not_indexed_same=False):
+    def _wrap_applied_output(
+        self,
+        keys,
+        values,
+        not_indexed_same: bool = False,
+        override_group_keys: bool = False,
+    ) -> FrameOrSeriesUnion:
+        result: FrameOrSeriesUnion
         if len(keys) == 0:
             # GH #6265
             return self.obj._constructor(
@@ -440,10 +447,20 @@ def _get_index() -> Index:
             return result
 
         if isinstance(values[0], Series):
-            return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
+            return self._concat_objects(
+                keys,
+                values,
+                not_indexed_same=not_indexed_same,
+                override_group_keys=override_group_keys,
+            )
         elif isinstance(values[0], DataFrame):
             # possible that Series -> DataFrame by applied function
-            return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
+            return self._concat_objects(
+                keys,
+                values,
+                not_indexed_same=not_indexed_same,
+                override_group_keys=override_group_keys,
+            )
         else:
             # GH #6265 #24880
             result = self.obj._constructor(
@@ -1203,7 +1220,13 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame:
 
         return self.obj._constructor(result, columns=result_columns)
 
-    def _wrap_applied_output(self, keys, values, not_indexed_same=False):
+    def _wrap_applied_output(
+        self,
+        keys,
+        values,
+        not_indexed_same: bool = False,
+        override_group_keys: bool = False,
+    ) -> FrameOrSeriesUnion:
         if len(keys) == 0:
             return self.obj._constructor(index=keys)
 
@@ -1217,7 +1240,12 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
             # We'd prefer it return an empty dataframe.
             return self.obj._constructor()
         elif isinstance(first_not_none, DataFrame):
-            return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
+            return self._concat_objects(
+                keys,
+                values,
+                not_indexed_same=not_indexed_same,
+                override_group_keys=override_group_keys,
+            )
         else:
             if len(self.grouper.groupings) > 1:
                 key_index = self.grouper.result_index
@@ -1247,14 +1275,16 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
             # make Nones an empty object
             if first_not_none is None:
                 return self.obj._constructor()
-            elif isinstance(first_not_none, NDFrame):
+            elif isinstance(first_not_none, (Series, DataFrame)):
 
                 # this is to silence a DeprecationWarning
                 # TODO: Remove when default dtype of empty Series is object
                 kwargs = first_not_none._construct_axes_dict()
+                backup: FrameOrSeriesUnion
                 if isinstance(first_not_none, Series):
+                    kwargs["dtype_if_empty"] = object
                     backup = create_series_with_explicit_dtype(
-                        **kwargs, dtype_if_empty=object
+                        **kwargs,
                     )
                 else:
                     backup = first_not_none._constructor(**kwargs)
@@ -1284,7 +1314,10 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                             # OR we don't have a multi-index and we have only a
                             # single values
                             return self._concat_objects(
-                                keys, values, not_indexed_same=not_indexed_same
+                                keys,
+                                values,
+                                not_indexed_same=not_indexed_same,
+                                override_group_keys=override_group_keys,
                             )
 
                         # still a series
@@ -1296,7 +1329,12 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
 
                     if not all_indexed_same:
                         # GH 8467
-                        return self._concat_objects(keys, values, not_indexed_same=True)
+                        return self._concat_objects(
+                            keys,
+                            values,
+                            not_indexed_same=True,
+                            override_group_keys=override_group_keys,
+                        )
 
                 if self.axis == 0 and isinstance(v, ABCSeries):
                     # GH6124 if the list of Series have a consistent name,
@@ -1668,12 +1706,17 @@ def _gotitem(self, key, ndim: int, subset=None):
                 exclusions=self.exclusions,
                 as_index=self.as_index,
                 observed=self.observed,
+                group_keys=self.group_keys,
             )
         elif ndim == 1:
             if subset is None:
                 subset = self.obj[key]
             return SeriesGroupBy(
-                subset, selection=key, grouper=self.grouper, observed=self.observed
+                subset,
+                selection=key,
+                grouper=self.grouper,
+                observed=self.observed,
+                group_keys=self.group_keys,
             )
 
         raise AssertionError("invalid ndim for _gotitem")