diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 7afaa12da12fb..4c868747fa930 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -14,7 +14,6 @@ DtypeObj, npt, ) -from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly from pandas.core.dtypes.cast import astype_nansafe @@ -338,22 +337,6 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: def _values_for_argsort(self) -> np.ndarray: return self._data - def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): - nv.validate_sum((), kwargs) - return super()._reduce("sum", skipna=skipna, min_count=min_count, axis=axis) - - def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): - nv.validate_prod((), kwargs) - return super()._reduce("prod", skipna=skipna, min_count=min_count, axis=axis) - - def min(self, *, skipna=True, axis: int | None = 0, **kwargs): - nv.validate_min((), kwargs) - return super()._reduce("min", skipna=skipna, axis=axis) - - def max(self, *, skipna=True, axis: int | None = 0, **kwargs): - nv.validate_max((), kwargs) - return super()._reduce("max", skipna=skipna, axis=axis) - _dtype_docstring = """ An ExtensionDtype for {dtype} data. diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index bbc1304d6596c..3587575503d33 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -15,7 +15,6 @@ DtypeObj, npt, ) -from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly from pandas.core.dtypes.base import ( @@ -409,22 +408,6 @@ def _values_for_argsort(self) -> np.ndarray: data[self._mask] = data.min() - 1 return data - def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): - nv.validate_sum((), kwargs) - return super()._reduce("sum", skipna=skipna, min_count=min_count, axis=axis) - - def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): - nv.validate_prod((), kwargs) - return super()._reduce("prod", skipna=skipna, min_count=min_count, axis=axis) - - def min(self, *, skipna=True, axis: int | None = 0, **kwargs): - nv.validate_min((), kwargs) - return super()._reduce("min", skipna=skipna, axis=axis) - - def max(self, *, skipna=True, axis: int | None = 0, **kwargs): - nv.validate_max((), kwargs) - return super()._reduce("max", skipna=skipna, axis=axis) - _dtype_docstring = """ An ExtensionDtype for {dtype} integer data. diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index ef0b407eebd9e..7391ac3fe4c35 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -783,13 +783,13 @@ def _quantile( return out def _reduce(self, name: str, *, skipna: bool = True, **kwargs): - if name in {"any", "all"}: + if name in {"any", "all", "min", "max", "sum", "prod"}: return getattr(self, name)(skipna=skipna, **kwargs) data = self._data mask = self._mask - if name in {"sum", "prod", "min", "max", "mean"}: + if name in {"mean"}: op = getattr(masked_reductions, name) result = op(data, mask, skipna=skipna, **kwargs) return result @@ -799,6 +799,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): if self._hasna: data = self.to_numpy("float64", na_value=np.nan) + # median, var, std, skew, kurt, idxmin, idxmax op = getattr(nanops, "nan" + name) result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) @@ -807,6 +808,70 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): return result + def _wrap_reduction_result(self, name: str, result, skipna, **kwargs): + if isinstance(result, np.ndarray): + axis = kwargs["axis"] + if skipna: + # we only retain mask for all-NA rows/columns + mask = self._mask.all(axis=axis) + else: + mask = self._mask.any(axis=axis) + + return self._maybe_mask_result(result, mask, other=None, op_name=name) + return result + + def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): + nv.validate_sum((), kwargs) + + # TODO: do this in validate_sum? + if "out" in kwargs: + # np.sum; test_floating_array_numpy_sum + if kwargs["out"] is not None: + raise NotImplementedError + kwargs.pop("out") + + result = masked_reductions.sum( + self._data, + self._mask, + skipna=skipna, + min_count=min_count, + axis=axis, + ) + return self._wrap_reduction_result( + "sum", result, skipna=skipna, axis=axis, **kwargs + ) + + def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): + nv.validate_prod((), kwargs) + result = masked_reductions.prod( + self._data, + self._mask, + skipna=skipna, + min_count=min_count, + axis=axis, + ) + return self._wrap_reduction_result( + "prod", result, skipna=skipna, axis=axis, **kwargs + ) + + def min(self, *, skipna=True, axis: int | None = 0, **kwargs): + nv.validate_min((), kwargs) + return masked_reductions.min( + self._data, + self._mask, + skipna=skipna, + axis=axis, + ) + + def max(self, *, skipna=True, axis: int | None = 0, **kwargs): + nv.validate_max((), kwargs) + return masked_reductions.max( + self._data, + self._mask, + skipna=skipna, + axis=axis, + ) + def any(self, *, skipna: bool = True, **kwargs): """ Return whether any element is truthy. diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index fc42c11c510d9..bdd7f8c0d3c8c 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -153,18 +153,6 @@ def _arith_method(self, other, op): _HANDLED_TYPES = (np.ndarray, numbers.Number) - def _reduce(self, name: str, *, skipna: bool = True, **kwargs): - result = super()._reduce(name, skipna=skipna, **kwargs) - if isinstance(result, np.ndarray): - axis = kwargs["axis"] - if skipna: - # we only retain mask for all-NA rows/columns - mask = self._mask.all(axis=axis) - else: - mask = self._mask.any(axis=axis) - return type(self)(result, mask=mask) - return result - def __neg__(self): return type(self)(-self._data, self._mask.copy()) diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index a86c07c604320..aebb6233ba777 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -206,9 +206,9 @@ def test_reductions_2d_axis0(self, data, method, request): if method in ["mean", "median", "sum", "prod"]: # std and var are not dtype-preserving expected = data - if method in ["sum", "prod"] and data.dtype.kind in ["i", "u"]: + if method in ["sum", "prod"] and data.dtype.kind in "iub": # FIXME: kludge - if data.dtype.kind == "i": + if data.dtype.kind in ["i", "b"]: if is_platform_windows() or not IS64: # FIXME: kludge for 32bit builds if result.dtype.itemsize == 4: @@ -217,7 +217,7 @@ def test_reductions_2d_axis0(self, data, method, request): dtype = pd.Int64Dtype() else: dtype = pd.Int64Dtype() - else: + elif data.dtype.kind == "u": if is_platform_windows() or not IS64: # FIXME: kludge for 32bit builds if result.dtype.itemsize == 4: @@ -228,7 +228,11 @@ def test_reductions_2d_axis0(self, data, method, request): dtype = pd.UInt64Dtype() expected = data.astype(dtype) - assert type(expected) == type(data), type(expected) + if data.dtype.kind == "b" and method in ["sum", "prod"]: + # We get IntegerArray instead of BooleanArray + pass + else: + assert type(expected) == type(data), type(expected) assert dtype == expected.dtype self.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index c2daac74fb4eb..49488e823d662 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -570,7 +570,9 @@ def test_sum_inf(self): res = nanops.nansum(arr, axis=1) assert np.isinf(res).all() - @pytest.mark.parametrize("dtype", ["float64", "Int64", "boolean", "object"]) + @pytest.mark.parametrize( + "dtype", ["float64", "Float32", "Int64", "boolean", "object"] + ) @pytest.mark.parametrize("use_bottleneck", [True, False]) @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)]) def test_empty(self, method, unit, use_bottleneck, dtype):