From 768f93cc925071621e4bdee3ea5e10c52dfd91a7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 18 Feb 2020 12:52:45 -0800
Subject: [PATCH] REF: remove SeriesBinGrouper, SeriesGrouper

---
 pandas/_libs/reduction.pyx               | 247 -----------------------
 pandas/core/groupby/groupby.py           |  11 +-
 pandas/core/groupby/ops.py               |  50 +----
 pandas/tests/groupby/test_bin_groupby.py |  42 ----
 4 files changed, 3 insertions(+), 347 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index b27072aa66708..382a5c6692b55 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -147,253 +147,6 @@ cdef class Reducer:
         return result
 
 
-cdef class _BaseGrouper:
-    cdef _check_dummy(self, dummy):
-        # both values and index must be an ndarray!
-
-        values = dummy.values
-        # GH 23683: datetimetz types are equivalent to datetime types here
-        if (dummy.dtype != self.arr.dtype
-                and values.dtype != self.arr.dtype):
-            raise ValueError('Dummy array must be same dtype')
-        if util.is_array(values) and not values.flags.contiguous:
-            # e.g. Categorical has no `flags` attribute
-            values = values.copy()
-        index = dummy.index.values
-        if not index.flags.contiguous:
-            index = index.copy()
-
-        return values, index
-
-    cdef inline _update_cached_objs(self, object cached_typ, object cached_ityp,
-                                    Slider islider, Slider vslider):
-        if cached_typ is None:
-            cached_ityp = self.ityp(islider.buf)
-            cached_typ = self.typ(vslider.buf, index=cached_ityp, name=self.name)
-        else:
-            # See the comment in indexes/base.py about _index_data.
-            # We need this for EA-backed indexes that have a reference
-            # to a 1-d ndarray like datetime / timedelta / period.
-            object.__setattr__(cached_ityp, '_index_data', islider.buf)
-            cached_ityp._engine.clear_mapping()
-            object.__setattr__(cached_typ._data._block, 'values', vslider.buf)
-            object.__setattr__(cached_typ, '_index', cached_ityp)
-            object.__setattr__(cached_typ, 'name', self.name)
-
-        return cached_typ, cached_ityp
-
-    cdef inline object _apply_to_group(self,
-                                       object cached_typ, object cached_ityp,
-                                       Slider islider, Slider vslider,
-                                       Py_ssize_t group_size, bint initialized):
-        """
-        Call self.f on our new group, then update to the next group.
-        """
-        cached_ityp._engine.clear_mapping()
-        res = self.f(cached_typ)
-        res = _extract_result(res)
-        if not initialized:
-            # On the first pass, we check the output shape to see
-            #  if this looks like a reduction.
-            initialized = 1
-            _check_result_array(res, len(self.dummy_arr))
-
-        islider.advance(group_size)
-        vslider.advance(group_size)
-
-        return res, initialized
-
-
-cdef class SeriesBinGrouper(_BaseGrouper):
-    """
-    Performs grouping operation according to bin edges, rather than labels
-    """
-    cdef:
-        Py_ssize_t nresults, ngroups
-
-    cdef public:
-        ndarray arr, index, dummy_arr, dummy_index
-        object values, f, bins, typ, ityp, name
-
-    def __init__(self, object series, object f, object bins, object dummy):
-
-        assert dummy is not None  # always obj[:0]
-        assert len(bins) > 0  # otherwise we get IndexError in get_result
-
-        self.bins = bins
-        self.f = f
-
-        values = series.values
-        if util.is_array(values) and not values.flags.c_contiguous:
-            # e.g. Categorical has no `flags` attribute
-            values = values.copy('C')
-        self.arr = values
-        self.typ = series._constructor
-        self.ityp = series.index._constructor
-        self.index = series.index.values
-        self.name = series.name
-
-        self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
-
-        # kludge for #1688
-        if len(bins) > 0 and bins[-1] == len(series):
-            self.ngroups = len(bins)
-        else:
-            self.ngroups = len(bins) + 1
-
-    def get_result(self):
-        cdef:
-            ndarray arr, result
-            ndarray[int64_t] counts
-            Py_ssize_t i, n, group_size
-            object res
-            bint initialized = 0
-            Slider vslider, islider
-            object cached_typ = None, cached_ityp = None
-
-        counts = np.zeros(self.ngroups, dtype=np.int64)
-
-        if self.ngroups > 0:
-            counts[0] = self.bins[0]
-            for i in range(1, self.ngroups):
-                if i == self.ngroups - 1:
-                    counts[i] = len(self.arr) - self.bins[i - 1]
-                else:
-                    counts[i] = self.bins[i] - self.bins[i - 1]
-
-        group_size = 0
-        n = len(self.arr)
-
-        vslider = Slider(self.arr, self.dummy_arr)
-        islider = Slider(self.index, self.dummy_index)
-
-        result = np.empty(self.ngroups, dtype='O')
-
-        try:
-            for i in range(self.ngroups):
-                group_size = counts[i]
-
-                islider.set_length(group_size)
-                vslider.set_length(group_size)
-
-                cached_typ, cached_ityp = self._update_cached_objs(
-                    cached_typ, cached_ityp, islider, vslider)
-
-                res, initialized = self._apply_to_group(cached_typ, cached_ityp,
-                                                        islider, vslider,
-                                                        group_size, initialized)
-
-                result[i] = res
-
-        finally:
-            # so we don't free the wrong memory
-            islider.reset()
-            vslider.reset()
-
-        result = maybe_convert_objects(result)
-        return result, counts
-
-
-cdef class SeriesGrouper(_BaseGrouper):
-    """
-    Performs generic grouping operation while avoiding ndarray construction
-    overhead
-    """
-    cdef:
-        Py_ssize_t nresults, ngroups
-
-    cdef public:
-        ndarray arr, index, dummy_arr, dummy_index
-        object f, labels, values, typ, ityp, name
-
-    def __init__(self, object series, object f, object labels,
-                 Py_ssize_t ngroups, object dummy):
-
-        # in practice we always pass obj.iloc[:0] or equivalent
-        assert dummy is not None
-
-        if len(series) == 0:
-            # get_result would never assign `result`
-            raise ValueError("SeriesGrouper requires non-empty `series`")
-
-        self.labels = labels
-        self.f = f
-
-        values = series.values
-        if util.is_array(values) and not values.flags.c_contiguous:
-            # e.g. Categorical has no `flags` attribute
-            values = values.copy('C')
-        self.arr = values
-        self.typ = series._constructor
-        self.ityp = series.index._constructor
-        self.index = series.index.values
-        self.name = series.name
-
-        self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
-        self.ngroups = ngroups
-
-    def get_result(self):
-        cdef:
-            # Define result to avoid UnboundLocalError
-            ndarray arr, result = None
-            ndarray[int64_t] labels, counts
-            Py_ssize_t i, n, group_size, lab
-            object res
-            bint initialized = 0
-            Slider vslider, islider
-            object cached_typ = None, cached_ityp = None
-
-        labels = self.labels
-        counts = np.zeros(self.ngroups, dtype=np.int64)
-        group_size = 0
-        n = len(self.arr)
-
-        vslider = Slider(self.arr, self.dummy_arr)
-        islider = Slider(self.index, self.dummy_index)
-
-        result = np.empty(self.ngroups, dtype='O')
-
-        try:
-            for i in range(n):
-                group_size += 1
-
-                lab = labels[i]
-
-                if i == n - 1 or lab != labels[i + 1]:
-                    if lab == -1:
-                        islider.advance(group_size)
-                        vslider.advance(group_size)
-                        group_size = 0
-                        continue
-
-                    islider.set_length(group_size)
-                    vslider.set_length(group_size)
-
-                    cached_typ, cached_ityp = self._update_cached_objs(
-                        cached_typ, cached_ityp, islider, vslider)
-
-                    res, initialized = self._apply_to_group(cached_typ, cached_ityp,
-                                                            islider, vslider,
-                                                            group_size, initialized)
-
-                    result[lab] = res
-                    counts[lab] = group_size
-                    group_size = 0
-
-        finally:
-            # so we don't free the wrong memory
-            islider.reset()
-            vslider.reset()
-
-        # We check for empty series in the constructor, so should always
-        #  have result initialized by this point.
-        assert initialized, "`result` has not been initialized."
-
-        result = maybe_convert_objects(result)
-
-        return result, counts
-
-
 cdef inline _extract_result(object res, bint squeeze=True):
     """ extract the result object, it might be a 0-dim ndarray
         or a len-1 0-dim, or a scalar """
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index cc46485b4a2e8..f946f0e63a583 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -923,17 +923,10 @@ def _python_agg_general(self, func, *args, **kwargs):
 
             try:
                 # if this function is invalid for this dtype, we will ignore it.
-                func(obj[:0])
+                result, counts = self.grouper.agg_series(obj, f)
             except TypeError:
                 continue
-            except AssertionError:
-                raise
-            except Exception:
-                # Our function depends on having a non-empty argument
-                #  See test_groupby_agg_err_catching
-                pass
-
-            result, counts = self.grouper.agg_series(obj, f)
+
             assert result is not None
             key = base.OutputKey(label=name, position=idx)
             output[key] = self._try_cast(result, obj, numeric_only=True)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 7259268ac3f2b..2b89a51eb635c 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -621,50 +621,8 @@ def agg_series(self, obj: Series, func):
         # Caller is responsible for checking ngroups != 0
         assert self.ngroups != 0
 
-        if len(obj) == 0:
-            # SeriesGrouper would raise if we were to call _aggregate_series_fast
-            return self._aggregate_series_pure_python(obj, func)
-
-        elif is_extension_array_dtype(obj.dtype):
-            # _aggregate_series_fast would raise TypeError when
-            #  calling libreduction.Slider
-            # In the datetime64tz case it would incorrectly cast to tz-naive
-            # TODO: can we get a performant workaround for EAs backed by ndarray?
-            return self._aggregate_series_pure_python(obj, func)
-
-        elif obj.index._has_complex_internals:
-            # Pre-empt TypeError in _aggregate_series_fast
-            return self._aggregate_series_pure_python(obj, func)
-
-        try:
-            return self._aggregate_series_fast(obj, func)
-        except ValueError as err:
-            if "Function does not reduce" in str(err):
-                # raised in libreduction
-                pass
-            else:
-                raise
         return self._aggregate_series_pure_python(obj, func)
 
-    def _aggregate_series_fast(self, obj: Series, func):
-        # At this point we have already checked that
-        #  - obj.index is not a MultiIndex
-        #  - obj is backed by an ndarray, not ExtensionArray
-        #  - len(obj) > 0
-        #  - ngroups != 0
-        func = self._is_builtin_func(func)
-
-        group_index, _, ngroups = self.group_info
-
-        # avoids object / Series creation overhead
-        dummy = obj.iloc[:0]
-        indexer = get_group_index_sorter(group_index, ngroups)
-        obj = obj.take(indexer)
-        group_index = algorithms.take_nd(group_index, indexer, allow_fill=False)
-        grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy)
-        result, counts = grouper.get_result()
-        return result, counts
-
     def _aggregate_series_pure_python(self, obj: Series, func):
 
         group_index, _, ngroups = self.group_info
@@ -856,13 +814,7 @@ def agg_series(self, obj: Series, func):
         assert self.ngroups != 0
         assert len(self.bins) > 0  # otherwise we'd get IndexError in get_result
 
-        if is_extension_array_dtype(obj.dtype):
-            # pre-empt SeriesBinGrouper from raising TypeError
-            return self._aggregate_series_pure_python(obj, func)
-
-        dummy = obj[:0]
-        grouper = libreduction.SeriesBinGrouper(obj, func, self.bins, dummy)
-        return grouper.get_result()
+        return self._aggregate_series_pure_python(obj, func)
 
 
 def _is_indexed_like(obj, axes) -> bool:
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index ff74d374e5e3f..5f20155fc46e4 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -9,48 +9,6 @@
 import pandas._testing as tm
 
 
-def test_series_grouper():
-    obj = Series(np.random.randn(10))
-    dummy = obj.iloc[:0]
-
-    labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64)
-
-    grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2, dummy)
-    result, counts = grouper.get_result()
-
-    expected = np.array([obj[3:6].mean(), obj[6:].mean()])
-    tm.assert_almost_equal(result, expected)
-
-    exp_counts = np.array([3, 4], dtype=np.int64)
-    tm.assert_almost_equal(counts, exp_counts)
-
-
-def test_series_grouper_requires_nonempty_raises():
-    # GH#29500
-    obj = Series(np.random.randn(10))
-    dummy = obj.iloc[:0]
-    labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64)
-
-    with pytest.raises(ValueError, match="SeriesGrouper requires non-empty `series`"):
-        libreduction.SeriesGrouper(dummy, np.mean, labels, 2, dummy)
-
-
-def test_series_bin_grouper():
-    obj = Series(np.random.randn(10))
-    dummy = obj[:0]
-
-    bins = np.array([3, 6])
-
-    grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins, dummy)
-    result, counts = grouper.get_result()
-
-    expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()])
-    tm.assert_almost_equal(result, expected)
-
-    exp_counts = np.array([3, 3, 4], dtype=np.int64)
-    tm.assert_almost_equal(counts, exp_counts)
-
-
 @pytest.mark.parametrize(
     "binner,closed,expected",
     [