Skip to content

REF: share MaskedArray reduction methods #44790

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
DtypeObj,
npt,
)
from pandas.compat.numpy import function as nv
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.cast import astype_nansafe
Expand Down Expand Up @@ -338,22 +337,6 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
def _values_for_argsort(self) -> np.ndarray:
return self._data

def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
nv.validate_sum((), kwargs)
return super()._reduce("sum", skipna=skipna, min_count=min_count, axis=axis)

def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
nv.validate_prod((), kwargs)
return super()._reduce("prod", skipna=skipna, min_count=min_count, axis=axis)

def min(self, *, skipna=True, axis: int | None = 0, **kwargs):
nv.validate_min((), kwargs)
return super()._reduce("min", skipna=skipna, axis=axis)

def max(self, *, skipna=True, axis: int | None = 0, **kwargs):
nv.validate_max((), kwargs)
return super()._reduce("max", skipna=skipna, axis=axis)


_dtype_docstring = """
An ExtensionDtype for {dtype} data.
Expand Down
17 changes: 0 additions & 17 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
DtypeObj,
npt,
)
from pandas.compat.numpy import function as nv
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.base import (
Expand Down Expand Up @@ -409,22 +408,6 @@ def _values_for_argsort(self) -> np.ndarray:
data[self._mask] = data.min() - 1
return data

def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
nv.validate_sum((), kwargs)
return super()._reduce("sum", skipna=skipna, min_count=min_count, axis=axis)

def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
nv.validate_prod((), kwargs)
return super()._reduce("prod", skipna=skipna, min_count=min_count, axis=axis)

def min(self, *, skipna=True, axis: int | None = 0, **kwargs):
nv.validate_min((), kwargs)
return super()._reduce("min", skipna=skipna, axis=axis)

def max(self, *, skipna=True, axis: int | None = 0, **kwargs):
nv.validate_max((), kwargs)
return super()._reduce("max", skipna=skipna, axis=axis)


_dtype_docstring = """
An ExtensionDtype for {dtype} integer data.
Expand Down
69 changes: 67 additions & 2 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,13 +783,13 @@ def _quantile(
return out

def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
if name in {"any", "all"}:
if name in {"any", "all", "min", "max", "sum", "prod"}:
return getattr(self, name)(skipna=skipna, **kwargs)

data = self._data
mask = self._mask

if name in {"sum", "prod", "min", "max", "mean"}:
if name in {"mean"}:
op = getattr(masked_reductions, name)
result = op(data, mask, skipna=skipna, **kwargs)
return result
Expand All @@ -799,6 +799,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
if self._hasna:
data = self.to_numpy("float64", na_value=np.nan)

# median, var, std, skew, kurt, idxmin, idxmax
op = getattr(nanops, "nan" + name)
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)

Expand All @@ -807,6 +808,70 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):

return result

def _wrap_reduction_result(self, name: str, result, skipna, **kwargs):
if isinstance(result, np.ndarray):
axis = kwargs["axis"]
if skipna:
# we only retain mask for all-NA rows/columns
mask = self._mask.all(axis=axis)
else:
mask = self._mask.any(axis=axis)

return self._maybe_mask_result(result, mask, other=None, op_name=name)
return result

def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
nv.validate_sum((), kwargs)

# TODO: do this in validate_sum?
if "out" in kwargs:
# np.sum; test_floating_array_numpy_sum
if kwargs["out"] is not None:
raise NotImplementedError
kwargs.pop("out")

result = masked_reductions.sum(
self._data,
self._mask,
skipna=skipna,
min_count=min_count,
axis=axis,
)
return self._wrap_reduction_result(
"sum", result, skipna=skipna, axis=axis, **kwargs
)

def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
nv.validate_prod((), kwargs)
result = masked_reductions.prod(
self._data,
self._mask,
skipna=skipna,
min_count=min_count,
axis=axis,
)
return self._wrap_reduction_result(
"prod", result, skipna=skipna, axis=axis, **kwargs
)

def min(self, *, skipna=True, axis: int | None = 0, **kwargs):
nv.validate_min((), kwargs)
return masked_reductions.min(
self._data,
self._mask,
skipna=skipna,
axis=axis,
)

def max(self, *, skipna=True, axis: int | None = 0, **kwargs):
nv.validate_max((), kwargs)
return masked_reductions.max(
self._data,
self._mask,
skipna=skipna,
axis=axis,
)

def any(self, *, skipna: bool = True, **kwargs):
"""
Return whether any element is truthy.
Expand Down
12 changes: 0 additions & 12 deletions pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,18 +153,6 @@ def _arith_method(self, other, op):

_HANDLED_TYPES = (np.ndarray, numbers.Number)

def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
result = super()._reduce(name, skipna=skipna, **kwargs)
if isinstance(result, np.ndarray):
axis = kwargs["axis"]
if skipna:
# we only retain mask for all-NA rows/columns
mask = self._mask.all(axis=axis)
else:
mask = self._mask.any(axis=axis)
return type(self)(result, mask=mask)
return result

def __neg__(self):
return type(self)(-self._data, self._mask.copy())

Expand Down
12 changes: 8 additions & 4 deletions pandas/tests/extension/base/dim2.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,9 @@ def test_reductions_2d_axis0(self, data, method, request):
if method in ["mean", "median", "sum", "prod"]:
# std and var are not dtype-preserving
expected = data
if method in ["sum", "prod"] and data.dtype.kind in ["i", "u"]:
if method in ["sum", "prod"] and data.dtype.kind in "iub":
# FIXME: kludge
if data.dtype.kind == "i":
if data.dtype.kind in ["i", "b"]:
if is_platform_windows() or not IS64:
# FIXME: kludge for 32bit builds
if result.dtype.itemsize == 4:
Expand All @@ -217,7 +217,7 @@ def test_reductions_2d_axis0(self, data, method, request):
dtype = pd.Int64Dtype()
else:
dtype = pd.Int64Dtype()
else:
elif data.dtype.kind == "u":
if is_platform_windows() or not IS64:
# FIXME: kludge for 32bit builds
if result.dtype.itemsize == 4:
Expand All @@ -228,7 +228,11 @@ def test_reductions_2d_axis0(self, data, method, request):
dtype = pd.UInt64Dtype()

expected = data.astype(dtype)
assert type(expected) == type(data), type(expected)
if data.dtype.kind == "b" and method in ["sum", "prod"]:
# We get IntegerArray instead of BooleanArray
pass
else:
assert type(expected) == type(data), type(expected)
assert dtype == expected.dtype

self.assert_extension_array_equal(result, expected)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,9 @@ def test_sum_inf(self):
res = nanops.nansum(arr, axis=1)
assert np.isinf(res).all()

@pytest.mark.parametrize("dtype", ["float64", "Int64", "boolean", "object"])
@pytest.mark.parametrize(
"dtype", ["float64", "Float32", "Int64", "boolean", "object"]
)
@pytest.mark.parametrize("use_bottleneck", [True, False])
@pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)])
def test_empty(self, method, unit, use_bottleneck, dtype):
Expand Down