Skip to content

BUG: SparseArray.min/max skipna #44829

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 22 additions & 15 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1491,69 +1491,76 @@ def mean(self, axis=0, *args, **kwargs):
nsparse = self.sp_index.ngaps
return (sp_sum + self.fill_value * nsparse) / (ct + nsparse)

def max(self, axis: int = 0, *args, **kwargs) -> Scalar:
def max(self, *, axis: int | None = None, skipna: bool = True):
"""
Max of non-NA/null values
Max of array values, ignoring NA values if specified.

Parameters
----------
axis : int, default 0
Not Used. NumPy compatibility.
*args, **kwargs
Not Used. NumPy compatibility.
skipna : bool, default True
Whether to ignore NA values.

Returns
-------
scalar
"""
nv.validate_max(args, kwargs)
return self._min_max("max")
nv.validate_minmax_axis(axis, self.ndim)
return self._min_max("max", skipna=skipna)

def min(self, axis: int = 0, *args, **kwargs) -> Scalar:
def min(self, *, axis: int | None = None, skipna: bool = True):
"""
Min of non-NA/null values
Min of array values, ignoring NA values if specified.

Parameters
----------
axis : int, default 0
Not Used. NumPy compatibility.
*args, **kwargs
Not Used. NumPy compatibility.
skipna : bool, default True
Whether to ignore NA values.

Returns
-------
scalar
"""
nv.validate_min(args, kwargs)
return self._min_max("min")
nv.validate_minmax_axis(axis, self.ndim)
return self._min_max("min", skipna=skipna)

def _min_max(self, kind: Literal["min", "max"]) -> Scalar:
def _min_max(self, kind: Literal["min", "max"], skipna: bool) -> Scalar:
"""
Min/max of non-NA/null values

Parameters
----------
kind : {"min", "max"}
skipna : bool

Returns
-------
scalar
"""
valid_vals = self._valid_sp_values
has_nonnull_fill_vals = not self._null_fill_value and self.sp_index.ngaps > 0

if len(valid_vals) > 0:
sp_min_max = getattr(valid_vals, kind)()

# If a non-null fill value is currently present, it might be the min/max
if has_nonnull_fill_vals:
func = max if kind == "max" else min
return func(sp_min_max, self.fill_value)
else:
elif skipna:
return sp_min_max
elif self.sp_index.ngaps == 0:
# No NAs present
return sp_min_max
else:
return na_value_for_dtype(self.dtype.subtype, compat=False)
elif has_nonnull_fill_vals:
return self.fill_value
else:
return na_value_for_dtype(self.dtype.subtype)
return na_value_for_dtype(self.dtype.subtype, compat=False)

# ------------------------------------------------------------------------
# Ufuncs
Expand Down
29 changes: 25 additions & 4 deletions pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,11 +1387,21 @@ class TestMinMax:
],
)
def test_nan_fill_value(self, raw_data, max_expected, min_expected):
max_result = SparseArray(raw_data).max()
min_result = SparseArray(raw_data).min()
arr = SparseArray(raw_data)
max_result = arr.max()
min_result = arr.min()
assert max_result in max_expected
assert min_result in min_expected

max_result = arr.max(skipna=False)
min_result = arr.min(skipna=False)
if np.isnan(raw_data).any():
assert np.isnan(max_result)
assert np.isnan(min_result)
else:
assert max_result in max_expected
assert min_result in min_expected

@pytest.mark.parametrize(
"fill_value,max_expected,min_expected",
[
Expand All @@ -1409,6 +1419,16 @@ def test_fill_value(self, fill_value, max_expected, min_expected):
min_result = arr.min()
assert min_result == min_expected

def test_only_fill_value(self):
fv = 100
arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv))
assert len(arr._valid_sp_values) == 0

assert arr.max() == fv
assert arr.min() == fv
assert arr.max(skipna=False) == fv
assert arr.min(skipna=False) == fv

@pytest.mark.parametrize("func", ["min", "max"])
@pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])])
@pytest.mark.parametrize(
Expand All @@ -1423,7 +1443,8 @@ def test_fill_value(self, fill_value, max_expected, min_expected):
def test_na_value_if_no_valid_values(self, func, data, dtype, expected):
arr = SparseArray(data, dtype=dtype)
result = getattr(arr, func)()
if expected == pd.NaT:
assert result == pd.NaT
if expected is pd.NaT:
# TODO: pin down whether we wrap datetime64("NaT")
assert result is pd.NaT or np.isnat(result)
else:
assert np.isnan(result)