diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 4b6b71088cb7c..25b41b020aee6 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -335,10 +335,6 @@ cdef class Slider: self.buf.shape[0] = 0 -class InvalidApply(Exception): - pass - - def apply_frame_axis0(object frame, object f, object names, const int64_t[:] starts, const int64_t[:] ends): cdef: @@ -365,11 +361,7 @@ def apply_frame_axis0(object frame, object f, object names, chunk = slider.dummy object.__setattr__(chunk, 'name', names[i]) - try: - piece = f(chunk) - except Exception as err: - # We can't be more specific without knowing something about `f` - raise InvalidApply("Let this error raise above us") from err + piece = f(chunk) # Need to infer if low level index slider will cause segfaults require_slow_apply = i == 0 and piece is chunk @@ -406,7 +398,8 @@ cdef class BlockSlider: """ cdef: object frame, dummy, index, block - list blk_values + list blocks, blk_values + ndarray orig_blklocs, orig_blknos ndarray values Slider idx_slider char **base_ptrs @@ -418,6 +411,13 @@ cdef class BlockSlider: self.dummy = frame[:0] self.index = self.dummy.index + # GH#35417 attributes we need to restore at each step in case + # the function modified them. + mgr = self.dummy._mgr + self.orig_blklocs = mgr.blklocs + self.orig_blknos = mgr.blknos + self.blocks = [x for x in self.dummy._mgr.blocks] + self.blk_values = [block.values for block in self.dummy._mgr.blocks] for values in self.blk_values: @@ -441,6 +441,9 @@ cdef class BlockSlider: cdef: ndarray arr Py_ssize_t i + + self._restore_blocks() + # move blocks for i in range(self.nblocks): arr = self.blk_values[i] @@ -460,9 +463,21 @@ cdef class BlockSlider: cdef: ndarray arr Py_ssize_t i + + self._restore_blocks() + for i in range(self.nblocks): arr = self.blk_values[i] # axis=1 is the frame's axis=0 arr.data = self.base_ptrs[i] arr.shape[1] = 0 + + cdef _restore_blocks(self): + """ + Ensure that we have the original blocks, blknos, and blklocs. + """ + mgr = self.dummy._mgr + mgr.blocks = self.blocks + mgr._blklocs = self.orig_blklocs + mgr._blknos = self.orig_blknos diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 7724e3930f7df..d1a4fc6fc74e5 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -202,13 +202,10 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0): try: result_values, mutated = splitter.fast_apply(f, sdata, group_keys) - except libreduction.InvalidApply as err: - # This Exception is raised if `f` triggers an exception - # but it is preferable to raise the exception in Python. - if "Let this error raise above us" not in str(err): - # TODO: can we infer anything about whether this is - # worth-retrying in pure-python? - raise + except IndexError: + # This is a rare case in which re-running in python-space may + # make a difference, see test_apply_mutate.test_mutate_groups + pass else: # If the fast apply path could be used we can return here.