From 9ce3a3ed63d36774cf251512ba2bab78aca222d1 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Dec 2021 08:30:56 -0800 Subject: [PATCH] BUG: SparseArray.min/max skipna --- pandas/core/arrays/sparse/array.py | 37 ++++++++++++++---------- pandas/tests/arrays/sparse/test_array.py | 29 ++++++++++++++++--- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 128ca6643eceb..a5fbf53a96592 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1491,49 +1491,50 @@ def mean(self, axis=0, *args, **kwargs): nsparse = self.sp_index.ngaps return (sp_sum + self.fill_value * nsparse) / (ct + nsparse) - def max(self, axis: int = 0, *args, **kwargs) -> Scalar: + def max(self, *, axis: int | None = None, skipna: bool = True): """ - Max of non-NA/null values + Max of array values, ignoring NA values if specified. Parameters ---------- axis : int, default 0 Not Used. NumPy compatibility. - *args, **kwargs - Not Used. NumPy compatibility. + skipna : bool, default True + Whether to ignore NA values. Returns ------- scalar """ - nv.validate_max(args, kwargs) - return self._min_max("max") + nv.validate_minmax_axis(axis, self.ndim) + return self._min_max("max", skipna=skipna) - def min(self, axis: int = 0, *args, **kwargs) -> Scalar: + def min(self, *, axis: int | None = None, skipna: bool = True): """ - Min of non-NA/null values + Min of array values, ignoring NA values if specified. Parameters ---------- axis : int, default 0 Not Used. NumPy compatibility. - *args, **kwargs - Not Used. NumPy compatibility. + skipna : bool, default True + Whether to ignore NA values. Returns ------- scalar """ - nv.validate_min(args, kwargs) - return self._min_max("min") + nv.validate_minmax_axis(axis, self.ndim) + return self._min_max("min", skipna=skipna) - def _min_max(self, kind: Literal["min", "max"]) -> Scalar: + def _min_max(self, kind: Literal["min", "max"], skipna: bool) -> Scalar: """ Min/max of non-NA/null values Parameters ---------- kind : {"min", "max"} + skipna : bool Returns ------- @@ -1541,6 +1542,7 @@ def _min_max(self, kind: Literal["min", "max"]) -> Scalar: """ valid_vals = self._valid_sp_values has_nonnull_fill_vals = not self._null_fill_value and self.sp_index.ngaps > 0 + if len(valid_vals) > 0: sp_min_max = getattr(valid_vals, kind)() @@ -1548,12 +1550,17 @@ def _min_max(self, kind: Literal["min", "max"]) -> Scalar: if has_nonnull_fill_vals: func = max if kind == "max" else min return func(sp_min_max, self.fill_value) - else: + elif skipna: + return sp_min_max + elif self.sp_index.ngaps == 0: + # No NAs present return sp_min_max + else: + return na_value_for_dtype(self.dtype.subtype, compat=False) elif has_nonnull_fill_vals: return self.fill_value else: - return na_value_for_dtype(self.dtype.subtype) + return na_value_for_dtype(self.dtype.subtype, compat=False) # ------------------------------------------------------------------------ # Ufuncs diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index cc48918981338..d0e7b7d0a35fe 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1387,11 +1387,21 @@ class TestMinMax: ], ) def test_nan_fill_value(self, raw_data, max_expected, min_expected): - max_result = SparseArray(raw_data).max() - min_result = SparseArray(raw_data).min() + arr = SparseArray(raw_data) + max_result = arr.max() + min_result = arr.min() assert max_result in max_expected assert min_result in min_expected + max_result = arr.max(skipna=False) + min_result = arr.min(skipna=False) + if np.isnan(raw_data).any(): + assert np.isnan(max_result) + assert np.isnan(min_result) + else: + assert max_result in max_expected + assert min_result in min_expected + @pytest.mark.parametrize( "fill_value,max_expected,min_expected", [ @@ -1409,6 +1419,16 @@ def test_fill_value(self, fill_value, max_expected, min_expected): min_result = arr.min() assert min_result == min_expected + def test_only_fill_value(self): + fv = 100 + arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv)) + assert len(arr._valid_sp_values) == 0 + + assert arr.max() == fv + assert arr.min() == fv + assert arr.max(skipna=False) == fv + assert arr.min(skipna=False) == fv + @pytest.mark.parametrize("func", ["min", "max"]) @pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])]) @pytest.mark.parametrize( @@ -1423,7 +1443,8 @@ def test_fill_value(self, fill_value, max_expected, min_expected): def test_na_value_if_no_valid_values(self, func, data, dtype, expected): arr = SparseArray(data, dtype=dtype) result = getattr(arr, func)() - if expected == pd.NaT: - assert result == pd.NaT + if expected is pd.NaT: + # TODO: pin down whether we wrap datetime64("NaT") + assert result is pd.NaT or np.isnat(result) else: assert np.isnan(result)