Skip to content

Commit 069554a

Browse files
authored
PERF: DataFrame reductions with axis=None and EA dtypes (#54308)
* faster DataFrame reductions with axis=None and EA dtypes * whatsnew * clean test
1 parent ed487e0 commit 069554a

File tree

4 files changed

+12
-4
lines changed

4 files changed

+12
-4
lines changed

asv_bench/benchmarks/stat_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77

88
class FrameOps:
9-
params = [ops, ["float", "int", "Int64"], [0, 1]]
9+
params = [ops, ["float", "int", "Int64"], [0, 1, None]]
1010
param_names = ["op", "dtype", "axis"]
1111

1212
def setup(self, op, dtype, axis):

doc/source/whatsnew/v2.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ Performance improvements
447447
- Performance improvement in :func:`concat` (:issue:`52291`, :issue:`52290`)
448448
- :class:`Period`'s default formatter (`period_format`) is now significantly (~twice) faster. This improves performance of ``str(Period)``, ``repr(Period)``, and :meth:`Period.strftime(fmt=None)`, as well as ``PeriodArray.strftime(fmt=None)``, ``PeriodIndex.strftime(fmt=None)`` and ``PeriodIndex.format(fmt=None)``. Finally, ``to_csv`` operations involving :class:`PeriodArray` or :class:`PeriodIndex` with default ``date_format`` are also significantly accelerated. (:issue:`51459`)
449449
- Performance improvement accessing :attr:`arrays.IntegerArrays.dtype` & :attr:`arrays.FloatingArray.dtype` (:issue:`52998`)
450+
- Performance improvement in :class:`DataFrame` reductions with ``axis=None`` and extension dtypes (:issue:`54308`)
450451
- Performance improvement in :class:`MultiIndex` and multi-column operations (e.g. :meth:`DataFrame.sort_values`, :meth:`DataFrame.groupby`, :meth:`Series.unstack`) when index/column values are already sorted (:issue:`53806`)
451452
- Performance improvement in :class:`Series` reductions (:issue:`52341`)
452453
- Performance improvement in :func:`concat` when ``axis=1`` and objects have different indexes (:issue:`52541`)

pandas/core/frame.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
needs_i8_conversion,
107107
pandas_dtype,
108108
)
109+
from pandas.core.dtypes.concat import concat_compat
109110
from pandas.core.dtypes.dtypes import (
110111
ArrowDtype,
111112
BaseMaskedDtype,
@@ -11063,6 +11064,11 @@ def _get_data() -> DataFrame:
1106311064
if numeric_only:
1106411065
df = _get_data()
1106511066
if axis is None:
11067+
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
11068+
if isinstance(dtype, ExtensionDtype):
11069+
df = df.astype(dtype, copy=False)
11070+
arr = concat_compat(list(df._iter_column_arrays()))
11071+
return arr._reduce(name, skipna=skipna, keepdims=False, **kwds)
1106611072
return func(df.values)
1106711073
elif axis == 1:
1106811074
if len(df.index) == 0:

pandas/tests/frame/test_reductions.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1868,13 +1868,14 @@ def test_prod_sum_min_count_mixed_object():
18681868

18691869
@pytest.mark.parametrize("method", ["min", "max", "mean", "median", "skew", "kurt"])
18701870
@pytest.mark.parametrize("numeric_only", [True, False])
1871-
def test_reduction_axis_none_returns_scalar(method, numeric_only):
1871+
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
1872+
def test_reduction_axis_none_returns_scalar(method, numeric_only, dtype):
18721873
# GH#21597 As of 2.0, axis=None reduces over all axes.
18731874

1874-
df = DataFrame(np.random.randn(4, 4))
1875+
df = DataFrame(np.random.randn(4, 4), dtype=dtype)
18751876

18761877
result = getattr(df, method)(axis=None, numeric_only=numeric_only)
1877-
np_arr = df.to_numpy()
1878+
np_arr = df.to_numpy(dtype=np.float64)
18781879
if method in {"skew", "kurt"}:
18791880
comp_mod = pytest.importorskip("scipy.stats")
18801881
if method == "kurt":

0 commit comments

Comments
 (0)