pandas-dev · jreback · Nov 6, 2018 · Oct 24, 2018 · Oct 24, 2018 · Oct 24, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -850,6 +850,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`).
 - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`)
 - :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`).
+- Bug when grouping :meth:`Dataframe.groupby()` on ``ExtensionArray`` not returning the actual ``ExtensionArray`` dtype (:issue:`23227`:).
 
 .. _whatsnew_0240.api.incompatibilities:
 

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -24,7 +24,8 @@ class providing the base-class of operations.
 from pandas.util._validators import validate_kwargs
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
-from pandas.core.dtypes.common import ensure_float, is_numeric_dtype, is_scalar
+from pandas.core.dtypes.common import (
+    ensure_float, is_extension_array_dtype, is_numeric_dtype, is_scalar)
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas.core.algorithms as algorithms
@@ -754,7 +755,18 @@ def _try_cast(self, result, obj, numeric_only=False):
             dtype = obj.dtype
 
         if not is_scalar(result):
-            if numeric_only and is_numeric_dtype(dtype) or not numeric_only:
+            if is_extension_array_dtype(dtype):
+                # The function can return something of any type, so check
+                # if the type is compatible with the calling EA.
+                try:
+                    result = obj.values._from_sequence(result)
+                except Exception:
+                    # https://github.com/pandas-dev/pandas/issues/22850
+                    # pandas has no control over what 3rd-party ExtensionArrays
+                    # do in _values_from_sequence. We still want ops to work
+                    # though, so we catch any regular Exception.
+                    pass
+            elif numeric_only and is_numeric_dtype(dtype) or not numeric_only:
                 result = maybe_downcast_to_dtype(result, dtype)
 
         return result

diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
@@ -650,11 +650,9 @@ def test_preserve_dtypes(op):
 
     # groupby
     result = getattr(df.groupby("A"), op)()
-    expected = pd.DataFrame({
-        "B": np.array([1.0, 3.0]),
-        "C": np.array([1, 3], dtype="int64")
-    }, index=pd.Index(['a', 'b'], name='A'))
-    tm.assert_frame_equal(result, expected)
+
+    assert result.dtypes['B'].name == 'float64'
+    assert result.dtypes['C'].name == 'Int64'
 
 
 @pytest.mark.parametrize('op', ['mean'])
@@ -673,9 +671,10 @@ def test_reduce_to_float(op):
 
     # groupby
     result = getattr(df.groupby("A"), op)()
+
     expected = pd.DataFrame({
         "B": np.array([1.0, 3.0]),
-        "C": np.array([1, 3], dtype="float64")
+        "C": integer_array([1, 3], dtype="Int64")
     }, index=pd.Index(['a', 'b'], name='A'))
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py
@@ -26,25 +26,25 @@ def test_first_last_nth(self):
 
         # TODO: shouldn't these all be spares or not?
         tm.assert_frame_equal(sparse_grouped.first(),
-                              dense_grouped.first())
+                              dense_grouped.first().to_sparse())
         tm.assert_frame_equal(sparse_grouped.last(),
-                              dense_grouped.last())
+                              dense_grouped.last().to_sparse())
         tm.assert_frame_equal(sparse_grouped.nth(1),
                               dense_grouped.nth(1).to_sparse())
 
     def test_aggfuncs(self):
         sparse_grouped = self.sparse.groupby('A')
         dense_grouped = self.dense.groupby('A')
 
-        tm.assert_frame_equal(sparse_grouped.mean(),
-                              dense_grouped.mean())
+        tm.assert_frame_equal(sparse_grouped.mean().to_sparse(),
+                              dense_grouped.mean().to_sparse())
 
         # ToDo: sparse sum includes str column
         # tm.assert_frame_equal(sparse_grouped.sum(),
         #                       dense_grouped.sum())
 
-        tm.assert_frame_equal(sparse_grouped.count(),
-                              dense_grouped.count())
+        tm.assert_frame_equal(sparse_grouped.count().to_sparse(),
+                              dense_grouped.count().to_sparse())
 
 
 @pytest.mark.parametrize("fill_value", [0, np.nan])
@@ -55,5 +55,5 @@ def test_groupby_includes_fill_value(fill_value):
     sdf = df.to_sparse(fill_value=fill_value)
     result = sdf.groupby('a').sum()
     expected = df.groupby('a').sum()
-    tm.assert_frame_equal(result, expected,
+    tm.assert_frame_equal(result, expected.to_sparse(fill_value=fill_value),
                           check_index_type=False)
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
@@ -1576,6 +1576,7 @@ def test_resample_categorical_data_with_timedeltaindex(self):
                               'Group': ['A', 'A']},
                              index=pd.to_timedelta([0, 10], unit='s'))
         expected = expected.reindex(['Group_obj', 'Group'], axis=1)
+        expected['Group'] = expected['Group_obj'].astype('category')
         tm.assert_frame_equal(result, expected)
 
     def test_resample_daily_anchored(self):