Skip to content

Commit 2d05eb5

Browse files
authored
REF: share MaskedArray reduction methods (#44790)
1 parent 04b538a commit 2d05eb5

File tree

6 files changed

+78
-53
lines changed

6 files changed

+78
-53
lines changed

pandas/core/arrays/floating.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
DtypeObj,
1515
npt,
1616
)
17-
from pandas.compat.numpy import function as nv
1817
from pandas.util._decorators import cache_readonly
1918

2019
from pandas.core.dtypes.cast import astype_nansafe
@@ -338,22 +337,6 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
338337
def _values_for_argsort(self) -> np.ndarray:
339338
return self._data
340339

341-
def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
342-
nv.validate_sum((), kwargs)
343-
return super()._reduce("sum", skipna=skipna, min_count=min_count, axis=axis)
344-
345-
def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
346-
nv.validate_prod((), kwargs)
347-
return super()._reduce("prod", skipna=skipna, min_count=min_count, axis=axis)
348-
349-
def min(self, *, skipna=True, axis: int | None = 0, **kwargs):
350-
nv.validate_min((), kwargs)
351-
return super()._reduce("min", skipna=skipna, axis=axis)
352-
353-
def max(self, *, skipna=True, axis: int | None = 0, **kwargs):
354-
nv.validate_max((), kwargs)
355-
return super()._reduce("max", skipna=skipna, axis=axis)
356-
357340

358341
_dtype_docstring = """
359342
An ExtensionDtype for {dtype} data.

pandas/core/arrays/integer.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
DtypeObj,
1616
npt,
1717
)
18-
from pandas.compat.numpy import function as nv
1918
from pandas.util._decorators import cache_readonly
2019

2120
from pandas.core.dtypes.base import (
@@ -409,22 +408,6 @@ def _values_for_argsort(self) -> np.ndarray:
409408
data[self._mask] = data.min() - 1
410409
return data
411410

412-
def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
413-
nv.validate_sum((), kwargs)
414-
return super()._reduce("sum", skipna=skipna, min_count=min_count, axis=axis)
415-
416-
def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
417-
nv.validate_prod((), kwargs)
418-
return super()._reduce("prod", skipna=skipna, min_count=min_count, axis=axis)
419-
420-
def min(self, *, skipna=True, axis: int | None = 0, **kwargs):
421-
nv.validate_min((), kwargs)
422-
return super()._reduce("min", skipna=skipna, axis=axis)
423-
424-
def max(self, *, skipna=True, axis: int | None = 0, **kwargs):
425-
nv.validate_max((), kwargs)
426-
return super()._reduce("max", skipna=skipna, axis=axis)
427-
428411

429412
_dtype_docstring = """
430413
An ExtensionDtype for {dtype} integer data.

pandas/core/arrays/masked.py

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -783,13 +783,13 @@ def _quantile(
783783
return out
784784

785785
def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
786-
if name in {"any", "all"}:
786+
if name in {"any", "all", "min", "max", "sum", "prod"}:
787787
return getattr(self, name)(skipna=skipna, **kwargs)
788788

789789
data = self._data
790790
mask = self._mask
791791

792-
if name in {"sum", "prod", "min", "max", "mean"}:
792+
if name in {"mean"}:
793793
op = getattr(masked_reductions, name)
794794
result = op(data, mask, skipna=skipna, **kwargs)
795795
return result
@@ -799,6 +799,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
799799
if self._hasna:
800800
data = self.to_numpy("float64", na_value=np.nan)
801801

802+
# median, var, std, skew, kurt, idxmin, idxmax
802803
op = getattr(nanops, "nan" + name)
803804
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
804805

@@ -807,6 +808,70 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
807808

808809
return result
809810

811+
def _wrap_reduction_result(self, name: str, result, skipna, **kwargs):
812+
if isinstance(result, np.ndarray):
813+
axis = kwargs["axis"]
814+
if skipna:
815+
# we only retain mask for all-NA rows/columns
816+
mask = self._mask.all(axis=axis)
817+
else:
818+
mask = self._mask.any(axis=axis)
819+
820+
return self._maybe_mask_result(result, mask, other=None, op_name=name)
821+
return result
822+
823+
def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
824+
nv.validate_sum((), kwargs)
825+
826+
# TODO: do this in validate_sum?
827+
if "out" in kwargs:
828+
# np.sum; test_floating_array_numpy_sum
829+
if kwargs["out"] is not None:
830+
raise NotImplementedError
831+
kwargs.pop("out")
832+
833+
result = masked_reductions.sum(
834+
self._data,
835+
self._mask,
836+
skipna=skipna,
837+
min_count=min_count,
838+
axis=axis,
839+
)
840+
return self._wrap_reduction_result(
841+
"sum", result, skipna=skipna, axis=axis, **kwargs
842+
)
843+
844+
def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
845+
nv.validate_prod((), kwargs)
846+
result = masked_reductions.prod(
847+
self._data,
848+
self._mask,
849+
skipna=skipna,
850+
min_count=min_count,
851+
axis=axis,
852+
)
853+
return self._wrap_reduction_result(
854+
"prod", result, skipna=skipna, axis=axis, **kwargs
855+
)
856+
857+
def min(self, *, skipna=True, axis: int | None = 0, **kwargs):
858+
nv.validate_min((), kwargs)
859+
return masked_reductions.min(
860+
self._data,
861+
self._mask,
862+
skipna=skipna,
863+
axis=axis,
864+
)
865+
866+
def max(self, *, skipna=True, axis: int | None = 0, **kwargs):
867+
nv.validate_max((), kwargs)
868+
return masked_reductions.max(
869+
self._data,
870+
self._mask,
871+
skipna=skipna,
872+
axis=axis,
873+
)
874+
810875
def any(self, *, skipna: bool = True, **kwargs):
811876
"""
812877
Return whether any element is truthy.

pandas/core/arrays/numeric.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -153,18 +153,6 @@ def _arith_method(self, other, op):
153153

154154
_HANDLED_TYPES = (np.ndarray, numbers.Number)
155155

156-
def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
157-
result = super()._reduce(name, skipna=skipna, **kwargs)
158-
if isinstance(result, np.ndarray):
159-
axis = kwargs["axis"]
160-
if skipna:
161-
# we only retain mask for all-NA rows/columns
162-
mask = self._mask.all(axis=axis)
163-
else:
164-
mask = self._mask.any(axis=axis)
165-
return type(self)(result, mask=mask)
166-
return result
167-
168156
def __neg__(self):
169157
return type(self)(-self._data, self._mask.copy())
170158

pandas/tests/extension/base/dim2.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,9 +206,9 @@ def test_reductions_2d_axis0(self, data, method, request):
206206
if method in ["mean", "median", "sum", "prod"]:
207207
# std and var are not dtype-preserving
208208
expected = data
209-
if method in ["sum", "prod"] and data.dtype.kind in ["i", "u"]:
209+
if method in ["sum", "prod"] and data.dtype.kind in "iub":
210210
# FIXME: kludge
211-
if data.dtype.kind == "i":
211+
if data.dtype.kind in ["i", "b"]:
212212
if is_platform_windows() or not IS64:
213213
# FIXME: kludge for 32bit builds
214214
if result.dtype.itemsize == 4:
@@ -217,7 +217,7 @@ def test_reductions_2d_axis0(self, data, method, request):
217217
dtype = pd.Int64Dtype()
218218
else:
219219
dtype = pd.Int64Dtype()
220-
else:
220+
elif data.dtype.kind == "u":
221221
if is_platform_windows() or not IS64:
222222
# FIXME: kludge for 32bit builds
223223
if result.dtype.itemsize == 4:
@@ -228,7 +228,11 @@ def test_reductions_2d_axis0(self, data, method, request):
228228
dtype = pd.UInt64Dtype()
229229

230230
expected = data.astype(dtype)
231-
assert type(expected) == type(data), type(expected)
231+
if data.dtype.kind == "b" and method in ["sum", "prod"]:
232+
# We get IntegerArray instead of BooleanArray
233+
pass
234+
else:
235+
assert type(expected) == type(data), type(expected)
232236
assert dtype == expected.dtype
233237

234238
self.assert_extension_array_equal(result, expected)

pandas/tests/reductions/test_reductions.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,9 @@ def test_sum_inf(self):
570570
res = nanops.nansum(arr, axis=1)
571571
assert np.isinf(res).all()
572572

573-
@pytest.mark.parametrize("dtype", ["float64", "Int64", "boolean", "object"])
573+
@pytest.mark.parametrize(
574+
"dtype", ["float64", "Float32", "Int64", "boolean", "object"]
575+
)
574576
@pytest.mark.parametrize("use_bottleneck", [True, False])
575577
@pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)])
576578
def test_empty(self, method, unit, use_bottleneck, dtype):

0 commit comments

Comments
 (0)