Skip to content

Commit a48f451

Browse files
authored
BUG: SparseArray.min/max skipna (#44829)
1 parent 246c747 commit a48f451

File tree

2 files changed

+47
-19
lines changed

2 files changed

+47
-19
lines changed

pandas/core/arrays/sparse/array.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,69 +1491,76 @@ def mean(self, axis=0, *args, **kwargs):
14911491
nsparse = self.sp_index.ngaps
14921492
return (sp_sum + self.fill_value * nsparse) / (ct + nsparse)
14931493

1494-
def max(self, axis: int = 0, *args, **kwargs) -> Scalar:
1494+
def max(self, *, axis: int | None = None, skipna: bool = True):
14951495
"""
1496-
Max of non-NA/null values
1496+
Max of array values, ignoring NA values if specified.
14971497
14981498
Parameters
14991499
----------
15001500
axis : int, default 0
15011501
Not Used. NumPy compatibility.
1502-
*args, **kwargs
1503-
Not Used. NumPy compatibility.
1502+
skipna : bool, default True
1503+
Whether to ignore NA values.
15041504
15051505
Returns
15061506
-------
15071507
scalar
15081508
"""
1509-
nv.validate_max(args, kwargs)
1510-
return self._min_max("max")
1509+
nv.validate_minmax_axis(axis, self.ndim)
1510+
return self._min_max("max", skipna=skipna)
15111511

1512-
def min(self, axis: int = 0, *args, **kwargs) -> Scalar:
1512+
def min(self, *, axis: int | None = None, skipna: bool = True):
15131513
"""
1514-
Min of non-NA/null values
1514+
Min of array values, ignoring NA values if specified.
15151515
15161516
Parameters
15171517
----------
15181518
axis : int, default 0
15191519
Not Used. NumPy compatibility.
1520-
*args, **kwargs
1521-
Not Used. NumPy compatibility.
1520+
skipna : bool, default True
1521+
Whether to ignore NA values.
15221522
15231523
Returns
15241524
-------
15251525
scalar
15261526
"""
1527-
nv.validate_min(args, kwargs)
1528-
return self._min_max("min")
1527+
nv.validate_minmax_axis(axis, self.ndim)
1528+
return self._min_max("min", skipna=skipna)
15291529

1530-
def _min_max(self, kind: Literal["min", "max"]) -> Scalar:
1530+
def _min_max(self, kind: Literal["min", "max"], skipna: bool) -> Scalar:
15311531
"""
15321532
Min/max of non-NA/null values
15331533
15341534
Parameters
15351535
----------
15361536
kind : {"min", "max"}
1537+
skipna : bool
15371538
15381539
Returns
15391540
-------
15401541
scalar
15411542
"""
15421543
valid_vals = self._valid_sp_values
15431544
has_nonnull_fill_vals = not self._null_fill_value and self.sp_index.ngaps > 0
1545+
15441546
if len(valid_vals) > 0:
15451547
sp_min_max = getattr(valid_vals, kind)()
15461548

15471549
# If a non-null fill value is currently present, it might be the min/max
15481550
if has_nonnull_fill_vals:
15491551
func = max if kind == "max" else min
15501552
return func(sp_min_max, self.fill_value)
1551-
else:
1553+
elif skipna:
1554+
return sp_min_max
1555+
elif self.sp_index.ngaps == 0:
1556+
# No NAs present
15521557
return sp_min_max
1558+
else:
1559+
return na_value_for_dtype(self.dtype.subtype, compat=False)
15531560
elif has_nonnull_fill_vals:
15541561
return self.fill_value
15551562
else:
1556-
return na_value_for_dtype(self.dtype.subtype)
1563+
return na_value_for_dtype(self.dtype.subtype, compat=False)
15571564

15581565
# ------------------------------------------------------------------------
15591566
# Ufuncs

pandas/tests/arrays/sparse/test_array.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,11 +1387,21 @@ class TestMinMax:
13871387
],
13881388
)
13891389
def test_nan_fill_value(self, raw_data, max_expected, min_expected):
1390-
max_result = SparseArray(raw_data).max()
1391-
min_result = SparseArray(raw_data).min()
1390+
arr = SparseArray(raw_data)
1391+
max_result = arr.max()
1392+
min_result = arr.min()
13921393
assert max_result in max_expected
13931394
assert min_result in min_expected
13941395

1396+
max_result = arr.max(skipna=False)
1397+
min_result = arr.min(skipna=False)
1398+
if np.isnan(raw_data).any():
1399+
assert np.isnan(max_result)
1400+
assert np.isnan(min_result)
1401+
else:
1402+
assert max_result in max_expected
1403+
assert min_result in min_expected
1404+
13951405
@pytest.mark.parametrize(
13961406
"fill_value,max_expected,min_expected",
13971407
[
@@ -1409,6 +1419,16 @@ def test_fill_value(self, fill_value, max_expected, min_expected):
14091419
min_result = arr.min()
14101420
assert min_result == min_expected
14111421

1422+
def test_only_fill_value(self):
1423+
fv = 100
1424+
arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv))
1425+
assert len(arr._valid_sp_values) == 0
1426+
1427+
assert arr.max() == fv
1428+
assert arr.min() == fv
1429+
assert arr.max(skipna=False) == fv
1430+
assert arr.min(skipna=False) == fv
1431+
14121432
@pytest.mark.parametrize("func", ["min", "max"])
14131433
@pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])])
14141434
@pytest.mark.parametrize(
@@ -1423,7 +1443,8 @@ def test_fill_value(self, fill_value, max_expected, min_expected):
14231443
def test_na_value_if_no_valid_values(self, func, data, dtype, expected):
14241444
arr = SparseArray(data, dtype=dtype)
14251445
result = getattr(arr, func)()
1426-
if expected == pd.NaT:
1427-
assert result == pd.NaT
1446+
if expected is pd.NaT:
1447+
# TODO: pin down whether we wrap datetime64("NaT")
1448+
assert result is pd.NaT or np.isnat(result)
14281449
else:
14291450
assert np.isnan(result)

0 commit comments

Comments
 (0)