From 0c12f1353f291b32831eb97b14c4ed9433da16f3 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 7 May 2021 16:41:09 -0700
Subject: [PATCH 1/2] REF: document casting behavior in groupby

---
 pandas/core/groupby/generic.py | 10 +++-------
 pandas/core/groupby/groupby.py |  5 ++++-
 pandas/core/groupby/ops.py     | 25 ++++++++++++++++++++-----
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9287163053cac..ffadffd8e33e1 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -541,9 +541,6 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series:
             object.__setattr__(group, "name", name)
             res = func(group, *args, **kwargs)
 
-            if isinstance(res, (DataFrame, Series)):
-                res = res._values
-
             results.append(klass(res, index=group.index))
 
         # check for empty "results" to avoid concat ValueError
@@ -1236,12 +1233,11 @@ def _wrap_applied_output_series(
             columns = key_index
             stacked_values = stacked_values.T
 
+        if stacked_values.dtype == object:
+            # We'll have the DataFrame constructor do inference
+            stacked_values = stacked_values.tolist()
         result = self.obj._constructor(stacked_values, index=index, columns=columns)
 
-        # if we have date/time like in the original, then coerce dates
-        # as we are stacking can easily have object dtypes here
-        result = result._convert(datetime=True)
-
         if not self.as_index:
             self._insert_inaxis_grouper_inplace(result)
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 1105c1bd1d782..c4d4132932f4f 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1331,7 +1331,10 @@ def _agg_py_fallback(
             #  reductions; see GH#28949
             ser = df.iloc[:, 0]
 
-        res_values = self.grouper.agg_series(ser, alt)
+        # We do not get here with UDFs, so we know that our dtype
+        #  should always be preserved by the implemented aggregations
+        # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
+        res_values = self.grouper.agg_series(ser, alt, preserve=True)
 
         if isinstance(values, Categorical):
             # Because we only get here with known dtype-preserving
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 46b47bc29d8a6..fe737e2b27aea 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -967,11 +967,22 @@ def _cython_operation(
         )
 
     @final
-    def agg_series(self, obj: Series, func: F) -> ArrayLike:
+    def agg_series(self, obj: Series, func: F, preserve: bool = False) -> ArrayLike:
+        """
+        Parameters
+        ----------
+        obj : Series
+        func : function taking a Series and returning a scalar-like
+        preserve : bool
+            Whether the aggregation is known to be dtype-preserving.
+
+        Returns
+        -------
+        np.ndarray or ExtensionArray
+        """
         # test_groupby_empty_with_category gets here with self.ngroups == 0
         #  and len(obj) > 0
 
-        cast_back = True
         if len(obj) == 0:
             # SeriesGrouper would raise if we were to call _aggregate_series_fast
             result = self._aggregate_series_pure_python(obj, func)
@@ -983,17 +994,21 @@ def agg_series(self, obj: Series, func: F) -> ArrayLike:
             # TODO: can we get a performant workaround for EAs backed by ndarray?
             result = self._aggregate_series_pure_python(obj, func)
 
+            # we can preserve a little bit more aggressively with EA dtype
+            #  because maybe_cast_pointwise_result will do a try/except
+            #  with _from_sequence.  NB we are assuming here that _from_sequence
+            #  is sufficiently strict that it casts appropriately.
+            preserve = True
+
         elif obj.index._has_complex_internals:
             # Preempt TypeError in _aggregate_series_fast
             result = self._aggregate_series_pure_python(obj, func)
 
         else:
             result = self._aggregate_series_fast(obj, func)
-            cast_back = False
 
         npvalues = lib.maybe_convert_objects(result, try_float=False)
-        if cast_back:
-            # TODO: Is there a documented reason why we dont always cast_back?
+        if preserve:
             out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
         else:
             out = npvalues

From a347b579ae8682b46303ac14c78aa90963303f52 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 17 May 2021 08:56:53 -0700
Subject: [PATCH 2/2] preserve -> preserve_dtype

---
 pandas/core/groupby/groupby.py |  2 +-
 pandas/core/groupby/ops.py     | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 7acd4c96bdf45..0b07668a9fea2 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1332,7 +1332,7 @@ def _agg_py_fallback(
         # We do not get here with UDFs, so we know that our dtype
         #  should always be preserved by the implemented aggregations
         # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
-        res_values = self.grouper.agg_series(ser, alt, preserve=True)
+        res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
 
         if isinstance(values, Categorical):
             # Because we only get here with known dtype-preserving
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 279c8759ec6da..8b6136b3abc42 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -966,13 +966,15 @@ def _cython_operation(
         )
 
     @final
-    def agg_series(self, obj: Series, func: F, preserve: bool = False) -> ArrayLike:
+    def agg_series(
+        self, obj: Series, func: F, preserve_dtype: bool = False
+    ) -> ArrayLike:
         """
         Parameters
         ----------
         obj : Series
         func : function taking a Series and returning a scalar-like
-        preserve : bool
+        preserve_dtype : bool
             Whether the aggregation is known to be dtype-preserving.
 
         Returns
@@ -997,7 +999,7 @@ def agg_series(self, obj: Series, func: F, preserve: bool = False) -> ArrayLike:
             #  because maybe_cast_pointwise_result will do a try/except
             #  with _from_sequence.  NB we are assuming here that _from_sequence
             #  is sufficiently strict that it casts appropriately.
-            preserve = True
+            preserve_dtype = True
 
         elif obj.index._has_complex_internals:
             # Preempt TypeError in _aggregate_series_fast
@@ -1007,7 +1009,7 @@ def agg_series(self, obj: Series, func: F, preserve: bool = False) -> ArrayLike:
             result = self._aggregate_series_fast(obj, func)
 
         npvalues = lib.maybe_convert_objects(result, try_float=False)
-        if preserve:
+        if preserve_dtype:
             out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
         else:
             out = npvalues