From 233bc2d589e9c6dc2a856c644b8d6959cb8e4e79 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 1 Oct 2020 10:16:45 -0500 Subject: [PATCH 1/5] ENH: Implement FloatingArray reductions --- pandas/conftest.py | 11 ++++++++ pandas/core/arrays/floating.py | 17 ++++++++++--- pandas/tests/arrays/floating/test_function.py | 25 +++++++++++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 3865d287c6905..a423a31bd5950 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1063,6 +1063,17 @@ def any_nullable_int_dtype(request): return request.param +@pytest.fixture(params=tm.FLOAT_EA_DTYPES) +def any_nullable_float_dtype(request): + """ + Parameterized fixture for any nullable integer dtype. + + * 'Float32' + * 'Float64' + """ + return request.param + + @pytest.fixture(params=tm.SIGNED_EA_INT_DTYPES) def any_signed_nullable_int_dtype(request): """ diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index c3710196a8611..6843d6c5be73a 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -475,10 +475,19 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs): def sum(self, skipna=True, min_count=0, **kwargs): nv.validate_sum((), kwargs) - result = masked_reductions.sum( - values=self._data, mask=self._mask, skipna=skipna, min_count=min_count - ) - return result + return self._reduce("sum", skipna=skipna, min_count=min_count) + + def prod(self, skipna=True, min_count=0, **kwargs): + nv.validate_prod((), kwargs) + return self._reduce("prod", skipna=skipna, min_count=min_count) + + def min(self, skipna=True, **kwargs): + nv.validate_min((), kwargs) + return self._reduce("min", skipna=skipna) + + def max(self, skipna=True, **kwargs): + nv.validate_max((), kwargs) + return self._reduce("max", skipna=skipna) def _maybe_mask_result(self, result, mask, other, op_name: str): """ diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index 84c650f880541..3dacfc3a342ab 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -152,3 +152,28 @@ def test_preserve_dtypes(op): index=pd.Index(["a", "b"], name="A"), ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("method", ["min", "max"]) +def test_floating_array_min_max(skipna, method, any_nullable_float_dtype): + dtype = any_nullable_float_dtype + arr = pd.array([0.0, 1.0, None], dtype=dtype) + func = getattr(arr, method) + result = func(skipna=skipna) + if skipna: + assert result == (0 if method == "min" else 1) + else: + assert result is pd.NA + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("min_count", [0, 9]) +def test_integer_array_prod(skipna, min_count, any_nullable_float_dtype): + dtype = any_nullable_float_dtype + arr = pd.array([1.0, 2.0, None], dtype=dtype) + result = arr.prod(skipna=skipna, min_count=min_count) + if skipna and min_count == 0: + assert result == 2 + else: + assert result is pd.NA From 775ecaab7235514b885933f1acdd583f712d62cd Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 1 Oct 2020 10:27:03 -0500 Subject: [PATCH 2/5] Fix --- pandas/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index a423a31bd5950..e78a23359bd7f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1066,7 +1066,7 @@ def any_nullable_int_dtype(request): @pytest.fixture(params=tm.FLOAT_EA_DTYPES) def any_nullable_float_dtype(request): """ - Parameterized fixture for any nullable integer dtype. + Parameterized fixture for any nullable float dtype. * 'Float32' * 'Float64' From fae43b225391bdc659ef8670707a585e0cc24919 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 1 Oct 2020 10:27:51 -0500 Subject: [PATCH 3/5] Fix --- pandas/tests/arrays/floating/test_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index 3dacfc3a342ab..1b4d402212c32 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -169,7 +169,7 @@ def test_floating_array_min_max(skipna, method, any_nullable_float_dtype): @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("min_count", [0, 9]) -def test_integer_array_prod(skipna, min_count, any_nullable_float_dtype): +def test_floating_array_prod(skipna, min_count, any_nullable_float_dtype): dtype = any_nullable_float_dtype arr = pd.array([1.0, 2.0, None], dtype=dtype) result = arr.prod(skipna=skipna, min_count=min_count) From 8b4faa046035f2fc7f3f22483544a83e6dfcfe89 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 1 Oct 2020 14:01:55 -0500 Subject: [PATCH 4/5] dtype --- pandas/conftest.py | 11 ----------- pandas/tests/arrays/floating/test_function.py | 10 ++++------ 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index e78a23359bd7f..3865d287c6905 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1063,17 +1063,6 @@ def any_nullable_int_dtype(request): return request.param -@pytest.fixture(params=tm.FLOAT_EA_DTYPES) -def any_nullable_float_dtype(request): - """ - Parameterized fixture for any nullable float dtype. - - * 'Float32' - * 'Float64' - """ - return request.param - - @pytest.fixture(params=tm.SIGNED_EA_INT_DTYPES) def any_signed_nullable_int_dtype(request): """ diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index 1b4d402212c32..2767d93741d4c 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -112,8 +112,8 @@ def test_value_counts_empty(): @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("min_count", [0, 4]) -def test_floating_array_sum(skipna, min_count): - arr = pd.array([1, 2, 3, None], dtype="Float64") +def test_floating_array_sum(skipna, min_count, dtype): + arr = pd.array([1, 2, 3, None], dtype=dtype) result = arr.sum(skipna=skipna, min_count=min_count) if skipna and min_count == 0: assert result == 6.0 @@ -156,8 +156,7 @@ def test_preserve_dtypes(op): @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("method", ["min", "max"]) -def test_floating_array_min_max(skipna, method, any_nullable_float_dtype): - dtype = any_nullable_float_dtype +def test_floating_array_min_max(skipna, method, dtype): arr = pd.array([0.0, 1.0, None], dtype=dtype) func = getattr(arr, method) result = func(skipna=skipna) @@ -169,8 +168,7 @@ def test_floating_array_min_max(skipna, method, any_nullable_float_dtype): @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("min_count", [0, 9]) -def test_floating_array_prod(skipna, min_count, any_nullable_float_dtype): - dtype = any_nullable_float_dtype +def test_floating_array_prod(skipna, min_count, dtype): arr = pd.array([1.0, 2.0, None], dtype=dtype) result = arr.prod(skipna=skipna, min_count=min_count) if skipna and min_count == 0: From e465a55cd2c8b2a34ec6a74b630ec12fca776246 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 2 Oct 2020 16:52:57 -0500 Subject: [PATCH 5/5] Use BaseMaskedArray --- pandas/core/arrays/floating.py | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 6843d6c5be73a..a230760ca1abe 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -25,8 +25,7 @@ from pandas.core.dtypes.dtypes import register_extension_dtype from pandas.core.dtypes.missing import isna -from pandas.core import nanops, ops -from pandas.core.array_algos import masked_reductions +from pandas.core import ops from pandas.core.ops import invalid_comparison from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.tools.numeric import to_numeric @@ -452,42 +451,21 @@ def cmp_method(self, other): name = f"__{op.__name__}__" return set_function_name(cmp_method, name, cls) - def _reduce(self, name: str, skipna: bool = True, **kwargs): - data = self._data - mask = self._mask - - if name in {"sum", "prod", "min", "max"}: - op = getattr(masked_reductions, name) - return op(data, mask, skipna=skipna, **kwargs) - - # coerce to a nan-aware float if needed - # (we explicitly use NaN within reductions) - if self._hasna: - data = self.to_numpy("float64", na_value=np.nan) - - op = getattr(nanops, "nan" + name) - result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) - - if np.isnan(result): - return libmissing.NA - - return result - def sum(self, skipna=True, min_count=0, **kwargs): nv.validate_sum((), kwargs) - return self._reduce("sum", skipna=skipna, min_count=min_count) + return super()._reduce("sum", skipna=skipna, min_count=min_count) def prod(self, skipna=True, min_count=0, **kwargs): nv.validate_prod((), kwargs) - return self._reduce("prod", skipna=skipna, min_count=min_count) + return super()._reduce("prod", skipna=skipna, min_count=min_count) def min(self, skipna=True, **kwargs): nv.validate_min((), kwargs) - return self._reduce("min", skipna=skipna) + return super()._reduce("min", skipna=skipna) def max(self, skipna=True, **kwargs): nv.validate_max((), kwargs) - return self._reduce("max", skipna=skipna) + return super()._reduce("max", skipna=skipna) def _maybe_mask_result(self, result, mask, other, op_name: str): """