Skip to content

Commit 0aa48f7

Browse files
jbrockmendeljreback
authored andcommitted
PERF: perform reductions block-wise (#29847)
1 parent 27b713b commit 0aa48f7

File tree

4 files changed

+48
-2
lines changed

4 files changed

+48
-2
lines changed

pandas/core/frame.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7741,6 +7741,26 @@ def _get_data(axis_matters):
77417741
raise NotImplementedError(msg)
77427742
return data
77437743

7744+
if numeric_only is not None and axis in [0, 1]:
7745+
df = self
7746+
if numeric_only is True:
7747+
df = _get_data(axis_matters=True)
7748+
if axis == 1:
7749+
df = df.T
7750+
axis = 0
7751+
7752+
out_dtype = "bool" if filter_type == "bool" else None
7753+
7754+
# After possibly _get_data and transposing, we are now in the
7755+
# simple case where we can use BlockManager._reduce
7756+
res = df._data.reduce(op, axis=1, skipna=skipna, **kwds)
7757+
assert isinstance(res, dict)
7758+
if len(res):
7759+
assert len(res) == max(list(res.keys())) + 1, res.keys()
7760+
out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype)
7761+
out.index = df.columns
7762+
return out
7763+
77447764
if numeric_only is None:
77457765
values = self.values
77467766
try:

pandas/core/internals/managers.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,32 @@ def _verify_integrity(self):
340340
f"tot_items: {tot_items}"
341341
)
342342

343+
def reduce(self, func, *args, **kwargs):
344+
# If 2D, we assume that we're operating column-wise
345+
if self.ndim == 1:
346+
# we'll be returning a scalar
347+
blk = self.blocks[0]
348+
return func(blk.values, *args, **kwargs)
349+
350+
res = {}
351+
for blk in self.blocks:
352+
bres = func(blk.values, *args, **kwargs)
353+
354+
if np.ndim(bres) == 0:
355+
# EA
356+
assert blk.shape[0] == 1
357+
new_res = zip(blk.mgr_locs.as_array, [bres])
358+
else:
359+
assert bres.ndim == 1, bres.shape
360+
assert blk.shape[0] == len(bres), (blk.shape, bres.shape, args, kwargs)
361+
new_res = zip(blk.mgr_locs.as_array, bres)
362+
363+
nr = dict(new_res)
364+
assert not any(key in res for key in nr)
365+
res.update(nr)
366+
367+
return res
368+
343369
def apply(self, f, filter=None, **kwargs):
344370
"""
345371
Iterate over the blocks, collect and create a new BlockManager.

pandas/core/nanops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ def reduction(values, axis=None, skipna=True, mask=None):
831831
try:
832832
result = getattr(values, meth)(axis, dtype=dtype_max)
833833
result.fill(np.nan)
834-
except (AttributeError, TypeError, ValueError, np.core._internal.AxisError):
834+
except (AttributeError, TypeError, ValueError):
835835
result = np.nan
836836
else:
837837
result = getattr(values, meth)(axis)

pandas/tests/groupby/test_groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,7 @@ def test_omit_nuisance(df):
785785

786786
# won't work with axis = 1
787787
grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1)
788-
msg = r"unsupported operand type\(s\) for \+: 'Timestamp'"
788+
msg = "reduction operation 'sum' not allowed for this dtype"
789789
with pytest.raises(TypeError, match=msg):
790790
grouped.agg(lambda x: x.sum(0, numeric_only=False))
791791

0 commit comments

Comments
 (0)