diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 4d3bdde357e88..c3faa00dce4c7 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -93,105 +93,6 @@ cdef class _BaseGrouper: return res, initialized -cdef class SeriesBinGrouper(_BaseGrouper): - """ - Performs grouping operation according to bin edges, rather than labels - """ - cdef: - Py_ssize_t nresults, ngroups - - cdef public: - ndarray bins # ndarray[int64_t] - ndarray arr, index, dummy_arr, dummy_index - object values, f, typ, ityp, name, idtype - - def __init__(self, object series, object f, ndarray[int64_t] bins): - - assert len(bins) > 0 # otherwise we get IndexError in get_result - - self.bins = bins - self.f = f - - values = series.values - if is_array(values) and not values.flags.c_contiguous: - # e.g. Categorical has no `flags` attribute - values = values.copy('C') - self.arr = values - self.typ = series._constructor - self.ityp = series.index._constructor - self.idtype = series.index.dtype - self.index = series.index.values - self.name = series.name - - dummy = series.iloc[:0] - self.dummy_arr, self.dummy_index = self._check_dummy(dummy) - - # kludge for #1688 - if len(bins) > 0 and bins[-1] == len(series): - self.ngroups = len(bins) - else: - # TODO: not reached except in test_series_bin_grouper directly - # constructing SeriesBinGrouper; can we rule this case out? - self.ngroups = len(bins) + 1 - - def get_result(self): - cdef: - ndarray arr, result - ndarray[int64_t] counts - Py_ssize_t i, n, group_size, start, end - object res - bint initialized = 0 - Slider vslider, islider - object cached_series = None, cached_index = None - - counts = np.zeros(self.ngroups, dtype=np.int64) - - if self.ngroups > 0: - counts[0] = self.bins[0] - for i in range(1, self.ngroups): - if i == self.ngroups - 1: - counts[i] = len(self.arr) - self.bins[i - 1] - else: - counts[i] = self.bins[i] - self.bins[i - 1] - - group_size = 0 - n = len(self.arr) - - vslider = Slider(self.arr, self.dummy_arr) - islider = Slider(self.index, self.dummy_index) - - result = np.empty(self.ngroups, dtype='O') - - cached_index, cached_series = self._init_dummy_series_and_index( - islider, vslider - ) - - start = 0 - try: - for i in range(self.ngroups): - group_size = counts[i] - end = start + group_size - - islider.move(start, end) - vslider.move(start, end) - - self._update_cached_objs( - cached_series, cached_index, islider, vslider) - - res, initialized = self._apply_to_group(cached_series, cached_index, - initialized) - start += group_size - - result[i] = res - - finally: - # so we don't free the wrong memory - islider.reset() - vslider.reset() - - return result, counts - - cdef class SeriesGrouper(_BaseGrouper): """ Performs generic grouping operation while avoiding ndarray construction diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index f9ba34e916a04..45fab72669eb7 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -932,6 +932,11 @@ def agg_series( # Preempt TypeError in _aggregate_series_fast result = self._aggregate_series_pure_python(obj, func) + elif isinstance(self, BinGrouper): + # Not yet able to remove the BaseGrouper aggregate_series_fast, + # as test_crosstab.test_categorical breaks without it + result = self._aggregate_series_pure_python(obj, func) + else: result = self._aggregate_series_fast(obj, func) @@ -1149,15 +1154,9 @@ def groupings(self) -> list[grouper.Grouping]: def _aggregate_series_fast(self, obj: Series, func: F) -> np.ndarray: # -> np.ndarray[object] - - # At this point we have already checked that - # - obj.index is not a MultiIndex - # - obj is backed by an ndarray, not ExtensionArray - # - ngroups != 0 - # - len(self.bins) > 0 - sbg = libreduction.SeriesBinGrouper(obj, func, self.bins) - result, _ = sbg.get_result() - return result + raise NotImplementedError( + "This should not be reached; use _aggregate_series_pure_python" + ) def _is_indexed_like(obj, axes, axis: int) -> bool: diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 92e5e709a9b2e..789c9cf33289e 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -53,21 +53,6 @@ def test_series_grouper_requires_nonempty_raises(): libreduction.SeriesGrouper(dummy, np.mean, labels, 2) -def test_series_bin_grouper(): - obj = Series(np.random.randn(10)) - - bins = np.array([3, 6], dtype=np.int64) - - grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins) - result, counts = grouper.get_result() - - expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()], dtype=object) - tm.assert_almost_equal(result, expected) - - exp_counts = np.array([3, 3, 4], dtype=np.int64) - tm.assert_almost_equal(counts, exp_counts) - - def assert_block_lengths(x): assert len(x) == len(x._mgr.blocks[0].mgr_locs) return 0