diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 7b10a0f39bdbd..6fd84bf29e9c5 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -123,6 +123,7 @@ Other enhancements - Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`) - Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`) - Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`) +- :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`) - Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`) - diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 9769183700f27..66fc840f9821d 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -21,7 +21,6 @@ npt, type_t, ) -from pandas.compat.numpy import function as nv from pandas.core.dtypes.common import ( is_bool_dtype, @@ -310,6 +309,9 @@ class BooleanArray(BaseMaskedArray): # The value used to fill '_data' to avoid upcasting _internal_fill_value = False + # Fill values used for any/all + _truthy_value = True + _falsey_value = False _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} @@ -490,141 +492,6 @@ def _values_for_argsort(self) -> np.ndarray: data[self._mask] = -1 return data - def any(self, *, skipna: bool = True, **kwargs): - """ - Return whether any element is True. - - Returns False unless there is at least one element that is True. - By default, NAs are skipped. If ``skipna=False`` is specified and - missing values are present, similar :ref:`Kleene logic ` - is used as for logical operations. - - Parameters - ---------- - skipna : bool, default True - Exclude NA values. If the entire array is NA and `skipna` is - True, then the result will be False, as for an empty array. - If `skipna` is False, the result will still be True if there is - at least one element that is True, otherwise NA will be returned - if there are NA's present. - **kwargs : any, default None - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - bool or :attr:`pandas.NA` - - See Also - -------- - numpy.any : Numpy version of this method. - BooleanArray.all : Return whether all elements are True. - - Examples - -------- - The result indicates whether any element is True (and by default - skips NAs): - - >>> pd.array([True, False, True]).any() - True - >>> pd.array([True, False, pd.NA]).any() - True - >>> pd.array([False, False, pd.NA]).any() - False - >>> pd.array([], dtype="boolean").any() - False - >>> pd.array([pd.NA], dtype="boolean").any() - False - - With ``skipna=False``, the result can be NA if this is logically - required (whether ``pd.NA`` is True or False influences the result): - - >>> pd.array([True, False, pd.NA]).any(skipna=False) - True - >>> pd.array([False, False, pd.NA]).any(skipna=False) - - """ - kwargs.pop("axis", None) - nv.validate_any((), kwargs) - - values = self._data.copy() - np.putmask(values, self._mask, False) - result = values.any() - if skipna: - return result - else: - if result or len(self) == 0 or not self._mask.any(): - return result - else: - return self.dtype.na_value - - def all(self, *, skipna: bool = True, **kwargs): - """ - Return whether all elements are True. - - Returns True unless there is at least one element that is False. - By default, NAs are skipped. If ``skipna=False`` is specified and - missing values are present, similar :ref:`Kleene logic ` - is used as for logical operations. - - Parameters - ---------- - skipna : bool, default True - Exclude NA values. If the entire array is NA and `skipna` is - True, then the result will be True, as for an empty array. - If `skipna` is False, the result will still be False if there is - at least one element that is False, otherwise NA will be returned - if there are NA's present. - **kwargs : any, default None - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - bool or :attr:`pandas.NA` - - See Also - -------- - numpy.all : Numpy version of this method. - BooleanArray.any : Return whether any element is True. - - Examples - -------- - The result indicates whether any element is True (and by default - skips NAs): - - >>> pd.array([True, True, pd.NA]).all() - True - >>> pd.array([True, False, pd.NA]).all() - False - >>> pd.array([], dtype="boolean").all() - True - >>> pd.array([pd.NA], dtype="boolean").all() - True - - With ``skipna=False``, the result can be NA if this is logically - required (whether ``pd.NA`` is True or False influences the result): - - >>> pd.array([True, True, pd.NA]).all(skipna=False) - - >>> pd.array([True, False, pd.NA]).all(skipna=False) - False - """ - kwargs.pop("axis", None) - nv.validate_all((), kwargs) - - values = self._data.copy() - np.putmask(values, self._mask, True) - result = values.all() - - if skipna: - return result - else: - if not result or len(self) == 0 or not self._mask.any(): - return result - else: - return self.dtype.na_value - def _logical_method(self, other, op): assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} @@ -753,13 +620,6 @@ def _arith_method(self, other, op): return self._maybe_mask_result(result, mask, other, op_name) - def _reduce(self, name: str, *, skipna: bool = True, **kwargs): - - if name in {"any", "all"}: - return getattr(self, name)(skipna=skipna, **kwargs) - - return super()._reduce(name, skipna=skipna, **kwargs) - def _maybe_mask_result(self, result, mask, other, op_name: str): """ Parameters diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 25b4076bd23c6..066f6ebdfcaa6 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -245,6 +245,9 @@ class FloatingArray(NumericArray): # The value used to fill '_data' to avoid upcasting _internal_fill_value = 0.0 + # Fill values used for any/all + _truthy_value = 1.0 + _falsey_value = 0.0 @cache_readonly def dtype(self) -> FloatingDtype: diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index e62a2f95b0340..078adeb11d3fb 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -307,6 +307,9 @@ class IntegerArray(NumericArray): # The value used to fill '_data' to avoid upcasting _internal_fill_value = 1 + # Fill values used for any/all + _truthy_value = 1 + _falsey_value = 0 @cache_readonly def dtype(self) -> _IntegerDtype: diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index e40d8b74c768c..23b9eeff03d4c 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -64,7 +64,7 @@ if TYPE_CHECKING: from pandas import Series from pandas.core.arrays import BooleanArray - +from pandas.compat.numpy import function as nv BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray") @@ -115,6 +115,9 @@ class BaseMaskedArray(OpsMixin, ExtensionArray): # The value used to fill '_data' to avoid upcasting _internal_fill_value: Scalar + # Fill values used for any/all + _truthy_value = Scalar # bool(_truthy_value) = True + _falsey_value = Scalar # bool(_falsey_value) = False def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): # values is supposed to already be validated in the subclass @@ -518,6 +521,9 @@ def value_counts(self, dropna: bool = True) -> Series: return Series(counts, index=index) def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + if name in {"any", "all"}: + return getattr(self, name)(skipna=skipna, **kwargs) + data = self._data mask = self._mask @@ -537,3 +543,156 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): return libmissing.NA return result + + def any(self, *, skipna: bool = True, **kwargs): + """ + Return whether any element is truthy. + + Returns False unless there is at least one element that is truthy. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + .. versionchanged:: 1.4.0 + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be False, as for an empty array. + If `skipna` is False, the result will still be True if there is + at least one element that is truthy, otherwise NA will be returned + if there are NA's present. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.any : Numpy version of this method. + BaseMaskedArray.all : Return whether all elements are truthy. + + Examples + -------- + The result indicates whether any element is truthy (and by default + skips NAs): + + >>> pd.array([True, False, True]).any() + True + >>> pd.array([True, False, pd.NA]).any() + True + >>> pd.array([False, False, pd.NA]).any() + False + >>> pd.array([], dtype="boolean").any() + False + >>> pd.array([pd.NA], dtype="boolean").any() + False + >>> pd.array([pd.NA], dtype="Float64").any() + False + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, False, pd.NA]).any(skipna=False) + True + >>> pd.array([1, 0, pd.NA]).any(skipna=False) + True + >>> pd.array([False, False, pd.NA]).any(skipna=False) + + >>> pd.array([0, 0, pd.NA]).any(skipna=False) + + """ + kwargs.pop("axis", None) + nv.validate_any((), kwargs) + + values = self._data.copy() + np.putmask(values, self._mask, self._falsey_value) + result = values.any() + if skipna: + return result + else: + if result or len(self) == 0 or not self._mask.any(): + return result + else: + return self.dtype.na_value + + def all(self, *, skipna: bool = True, **kwargs): + """ + Return whether all elements are truthy. + + Returns True unless there is at least one element that is falsey. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + .. versionchanged:: 1.4.0 + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be True, as for an empty array. + If `skipna` is False, the result will still be False if there is + at least one element that is falsey, otherwise NA will be returned + if there are NA's present. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.all : Numpy version of this method. + BooleanArray.any : Return whether any element is truthy. + + Examples + -------- + The result indicates whether all elements are truthy (and by default + skips NAs): + + >>> pd.array([True, True, pd.NA]).all() + True + >>> pd.array([1, 1, pd.NA]).all() + True + >>> pd.array([True, False, pd.NA]).all() + False + >>> pd.array([], dtype="boolean").all() + True + >>> pd.array([pd.NA], dtype="boolean").all() + True + >>> pd.array([pd.NA], dtype="Float64").all() + True + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, True, pd.NA]).all(skipna=False) + + >>> pd.array([1, 1, pd.NA]).all(skipna=False) + + >>> pd.array([True, False, pd.NA]).all(skipna=False) + False + >>> pd.array([1, 0, pd.NA]).all(skipna=False) + False + """ + kwargs.pop("axis", None) + nv.validate_all((), kwargs) + + values = self._data.copy() + np.putmask(values, self._mask, self._truthy_value) + result = values.all() + + if skipna: + return result + else: + if not result or len(self) == 0 or not self._mask.any(): + return result + else: + return self.dtype.na_value diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 395540993dc15..2ee26e139f3a6 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -381,6 +381,7 @@ def check_reduce(self, s, op_name, skipna): tm.assert_almost_equal(result, expected) +@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") class TestBooleanReduce(base.BaseBooleanReduceTests): pass diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py index 617dfc694741e..f4d3243b5129f 100644 --- a/pandas/tests/extension/test_floating.py +++ b/pandas/tests/extension/test_floating.py @@ -211,6 +211,7 @@ def check_reduce(self, s, op_name, skipna): tm.assert_almost_equal(result, expected) +@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") class TestBooleanReduce(base.BaseBooleanReduceTests): pass diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 2305edc1e1327..2cf4f8e415770 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -243,6 +243,7 @@ def check_reduce(self, s, op_name, skipna): tm.assert_almost_equal(result, expected) +@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") class TestBooleanReduce(base.BaseBooleanReduceTests): pass diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 513b9af18d2b6..62aae33134f60 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -964,6 +964,7 @@ def test_any_all_object_dtype_missing(self, data, bool_agg_func): expected = bool_agg_func == "any" and None not in data assert result == expected + @pytest.mark.parametrize("dtype", ["boolean", "Int64", "UInt64", "Float64"]) @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize( @@ -971,18 +972,19 @@ def test_any_all_object_dtype_missing(self, data, bool_agg_func): # [skipna=True/any, skipna=True/all]] "data,expected_data", [ - ([False, False, False], [[False, False], [False, False]]), - ([True, True, True], [[True, True], [True, True]]), + ([0, 0, 0], [[False, False], [False, False]]), + ([1, 1, 1], [[True, True], [True, True]]), ([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [False, True]]), - ([False, pd.NA, False], [[pd.NA, False], [False, False]]), - ([True, pd.NA, True], [[True, pd.NA], [True, True]]), - ([True, pd.NA, False], [[True, False], [True, False]]), + ([0, pd.NA, 0], [[pd.NA, False], [False, False]]), + ([1, pd.NA, 1], [[True, pd.NA], [True, True]]), + ([1, pd.NA, 0], [[True, False], [True, False]]), ], ) - def test_any_all_boolean_kleene_logic( - self, bool_agg_func, skipna, data, expected_data + def test_any_all_nullable_kleene_logic( + self, bool_agg_func, skipna, data, dtype, expected_data ): - ser = Series(data, dtype="boolean") + # GH-37506, GH-41967 + ser = Series(data, dtype=dtype) expected = expected_data[skipna][bool_agg_func == "all"] result = getattr(ser, bool_agg_func)(skipna=skipna)