Skip to content

REF: remove libreduction.SeriesBinGrouper #43189

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 0 additions & 99 deletions pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -93,105 +93,6 @@ cdef class _BaseGrouper:
return res, initialized


cdef class SeriesBinGrouper(_BaseGrouper):
"""
Performs grouping operation according to bin edges, rather than labels
"""
cdef:
Py_ssize_t nresults, ngroups

cdef public:
ndarray bins # ndarray[int64_t]
ndarray arr, index, dummy_arr, dummy_index
object values, f, typ, ityp, name, idtype

def __init__(self, object series, object f, ndarray[int64_t] bins):

assert len(bins) > 0 # otherwise we get IndexError in get_result

self.bins = bins
self.f = f

values = series.values
if is_array(values) and not values.flags.c_contiguous:
# e.g. Categorical has no `flags` attribute
values = values.copy('C')
self.arr = values
self.typ = series._constructor
self.ityp = series.index._constructor
self.idtype = series.index.dtype
self.index = series.index.values
self.name = series.name

dummy = series.iloc[:0]
self.dummy_arr, self.dummy_index = self._check_dummy(dummy)

# kludge for #1688
if len(bins) > 0 and bins[-1] == len(series):
self.ngroups = len(bins)
else:
# TODO: not reached except in test_series_bin_grouper directly
# constructing SeriesBinGrouper; can we rule this case out?
self.ngroups = len(bins) + 1

def get_result(self):
cdef:
ndarray arr, result
ndarray[int64_t] counts
Py_ssize_t i, n, group_size, start, end
object res
bint initialized = 0
Slider vslider, islider
object cached_series = None, cached_index = None

counts = np.zeros(self.ngroups, dtype=np.int64)

if self.ngroups > 0:
counts[0] = self.bins[0]
for i in range(1, self.ngroups):
if i == self.ngroups - 1:
counts[i] = len(self.arr) - self.bins[i - 1]
else:
counts[i] = self.bins[i] - self.bins[i - 1]

group_size = 0
n = len(self.arr)

vslider = Slider(self.arr, self.dummy_arr)
islider = Slider(self.index, self.dummy_index)

result = np.empty(self.ngroups, dtype='O')

cached_index, cached_series = self._init_dummy_series_and_index(
islider, vslider
)

start = 0
try:
for i in range(self.ngroups):
group_size = counts[i]
end = start + group_size

islider.move(start, end)
vslider.move(start, end)

self._update_cached_objs(
cached_series, cached_index, islider, vslider)

res, initialized = self._apply_to_group(cached_series, cached_index,
initialized)
start += group_size

result[i] = res

finally:
# so we don't free the wrong memory
islider.reset()
vslider.reset()

return result, counts


cdef class SeriesGrouper(_BaseGrouper):
"""
Performs generic grouping operation while avoiding ndarray construction
Expand Down
17 changes: 8 additions & 9 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,11 @@ def agg_series(
# Preempt TypeError in _aggregate_series_fast
result = self._aggregate_series_pure_python(obj, func)

elif isinstance(self, BinGrouper):
# Not yet able to remove the BaseGrouper aggregate_series_fast,
# as test_crosstab.test_categorical breaks without it
result = self._aggregate_series_pure_python(obj, func)

else:
result = self._aggregate_series_fast(obj, func)

Expand Down Expand Up @@ -1149,15 +1154,9 @@ def groupings(self) -> list[grouper.Grouping]:

def _aggregate_series_fast(self, obj: Series, func: F) -> np.ndarray:
# -> np.ndarray[object]

# At this point we have already checked that
# - obj.index is not a MultiIndex
# - obj is backed by an ndarray, not ExtensionArray
# - ngroups != 0
# - len(self.bins) > 0
sbg = libreduction.SeriesBinGrouper(obj, func, self.bins)
result, _ = sbg.get_result()
return result
raise NotImplementedError(
"This should not be reached; use _aggregate_series_pure_python"
)


def _is_indexed_like(obj, axes, axis: int) -> bool:
Expand Down
15 changes: 0 additions & 15 deletions pandas/tests/groupby/test_bin_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,6 @@ def test_series_grouper_requires_nonempty_raises():
libreduction.SeriesGrouper(dummy, np.mean, labels, 2)


def test_series_bin_grouper():
obj = Series(np.random.randn(10))

bins = np.array([3, 6], dtype=np.int64)

grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins)
result, counts = grouper.get_result()

expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()], dtype=object)
tm.assert_almost_equal(result, expected)

exp_counts = np.array([3, 3, 4], dtype=np.int64)
tm.assert_almost_equal(counts, exp_counts)


def assert_block_lengths(x):
assert len(x) == len(x._mgr.blocks[0].mgr_locs)
return 0
Expand Down