From 0b849d8184e0f7dbdfec33720db20d0f2c48d517 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 16 Mar 2021 20:30:47 -0700 Subject: [PATCH] BUG: frame.any/all with bool_only=True and Categorical[bool] --- pandas/core/dtypes/inference.py | 26 ++++++++++++++++++++++++++ pandas/core/internals/array_manager.py | 7 ++----- pandas/core/internals/blocks.py | 22 +++++++++------------- pandas/tests/frame/test_reductions.py | 3 +++ 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 97bffb35c28d9..58da2570015b5 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -8,6 +8,7 @@ import numpy as np from pandas._libs import lib +from pandas._typing import ArrayLike is_bool = lib.is_bool @@ -420,3 +421,28 @@ def is_dataclass(item): return is_dataclass(item) and not isinstance(item, type) except ImportError: return False + + +def is_inferred_bool_dtype(arr: ArrayLike) -> bool: + """ + Check if this is a ndarray[bool] or an ndarray[object] of bool objects. + + Parameters + ---------- + arr : np.ndarray or ExtensionArray + + Returns + ------- + bool + + Notes + ----- + This does not include the special treatment is_bool_dtype uses for + Categorical. + """ + dtype = arr.dtype + if dtype == np.dtype(bool): + return True + elif dtype == np.dtype("object"): + return lib.is_bool_array(arr.ravel("K")) + return False diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 2177839eb34ce..abb4121aca7cf 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -33,7 +33,6 @@ ) from pandas.core.dtypes.common import ( ensure_int64, - is_bool_dtype, is_datetime64_ns_dtype, is_dtype_equal, is_extension_array_dtype, @@ -50,6 +49,7 @@ ABCPandasArray, ABCSeries, ) +from pandas.core.dtypes.inference import is_inferred_bool_dtype from pandas.core.dtypes.missing import ( array_equals, isna, @@ -670,10 +670,7 @@ def get_bool_data(self, copy: bool = False) -> ArrayManager: copy : bool, default False Whether to copy the blocks """ - return self._get_data_subset( - lambda arr: is_bool_dtype(arr.dtype) - or (is_object_dtype(arr.dtype) and lib.is_bool_array(arr)) - ) + return self._get_data_subset(is_inferred_bool_dtype) def get_numeric_data(self, copy: bool = False) -> ArrayManager: """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3fd1ebaca19f0..c14faefe53396 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -68,6 +68,7 @@ ABCPandasArray, ABCSeries, ) +from pandas.core.dtypes.inference import is_inferred_bool_dtype from pandas.core.dtypes.missing import ( is_valid_na_for_dtype, isna, @@ -153,7 +154,6 @@ class Block(PandasObject): __slots__ = ["_mgr_locs", "values", "ndim"] is_numeric = False - is_bool = False is_object = False is_extension = False _can_hold_na = False @@ -216,6 +216,14 @@ def is_view(self) -> bool: def is_categorical(self) -> bool: return self._holder is Categorical + @final + @property + def is_bool(self) -> bool: + """ + We can be bool if a) we are bool dtype or b) object dtype with bool objects. + """ + return is_inferred_bool_dtype(self.values) + def external_values(self): """ The array that Series.values returns (public attribute). @@ -1800,10 +1808,6 @@ def _can_hold_element(self, element: Any) -> bool: def _can_hold_na(self): return self.dtype.kind not in ["b", "i", "u"] - @property - def is_bool(self): - return self.dtype.kind == "b" - class FloatBlock(NumericBlock): __slots__ = () @@ -2031,14 +2035,6 @@ class ObjectBlock(Block): values: np.ndarray - @property - def is_bool(self): - """ - we can be a bool if we have only bool values but are of type - object - """ - return lib.is_bool_array(self.values.ravel("K")) - @maybe_split def reduce(self, func, ignore_failures: bool = False) -> List[Block]: """ diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index d24320ad17709..672ab20fb9791 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1163,6 +1163,9 @@ def test_any_all_object_bool_only(self): df._consolidate_inplace() df["C"] = Series([True, True]) + # Categorical of bools is _not_ considered booly + df["D"] = df["C"].astype("category") + # The underlying bug is in DataFrame._get_bool_data, so we check # that while we're here res = df._get_bool_data()