Skip to content

Commit f1bb3b2

Browse files
authored
DEPR: object-dtype bool_only (#49371)
1 parent 0a5cb8f commit f1bb3b2

File tree

6 files changed

+17
-70
lines changed

6 files changed

+17
-70
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,8 @@ Removal of prior version deprecations/changes
258258
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
259259
- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)
260260
- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
261+
- Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`)
262+
-
261263

262264
.. ---------------------------------------------------------------------------
263265
.. _whatsnew_200.performance:

pandas/core/dtypes/inference.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,10 @@
66
from numbers import Number
77
import re
88
from typing import Pattern
9-
import warnings
109

1110
import numpy as np
1211

1312
from pandas._libs import lib
14-
from pandas._typing import ArrayLike
15-
from pandas.util._exceptions import find_stack_level
1613

1714
is_bool = lib.is_bool
1815

@@ -425,42 +422,3 @@ def is_dataclass(item):
425422
return is_dataclass(item) and not isinstance(item, type)
426423
except ImportError:
427424
return False
428-
429-
430-
def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
431-
"""
432-
Check if this is a ndarray[bool] or an ndarray[object] of bool objects.
433-
434-
Parameters
435-
----------
436-
arr : np.ndarray or ExtensionArray
437-
438-
Returns
439-
-------
440-
bool
441-
442-
Notes
443-
-----
444-
This does not include the special treatment is_bool_dtype uses for
445-
Categorical.
446-
"""
447-
if not isinstance(arr, np.ndarray):
448-
return False
449-
450-
dtype = arr.dtype
451-
if dtype == np.dtype(bool):
452-
return True
453-
elif dtype == np.dtype("object"):
454-
result = lib.is_bool_array(arr)
455-
if result:
456-
# GH#46188
457-
warnings.warn(
458-
"In a future version, object-dtype columns with all-bool values "
459-
"will not be included in reductions with bool_only=True. "
460-
"Explicitly cast to bool dtype instead.",
461-
FutureWarning,
462-
stacklevel=find_stack_level(),
463-
)
464-
return result
465-
466-
return False

pandas/core/internals/array_manager.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252
ABCDataFrame,
5353
ABCSeries,
5454
)
55-
from pandas.core.dtypes.inference import is_inferred_bool_dtype
5655
from pandas.core.dtypes.missing import (
5756
array_equals,
5857
isna,
@@ -488,7 +487,7 @@ def get_bool_data(self: T, copy: bool = False) -> T:
488487
copy : bool, default False
489488
Whether to copy the blocks
490489
"""
491-
return self._get_data_subset(is_inferred_bool_dtype)
490+
return self._get_data_subset(lambda x: x.dtype == np.dtype(bool))
492491

493492
def get_numeric_data(self: T, copy: bool = False) -> T:
494493
"""

pandas/core/internals/blocks.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@
7070
ABCPandasArray,
7171
ABCSeries,
7272
)
73-
from pandas.core.dtypes.inference import is_inferred_bool_dtype
7473
from pandas.core.dtypes.missing import (
7574
is_valid_na_for_dtype,
7675
isna,
@@ -194,7 +193,7 @@ def is_bool(self) -> bool:
194193
"""
195194
We can be bool if a) we are bool dtype or b) object dtype with bool objects.
196195
"""
197-
return is_inferred_bool_dtype(self.values)
196+
return self.values.dtype == np.dtype(bool)
198197

199198
@final
200199
def external_values(self):

pandas/tests/frame/test_reductions.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,7 +1280,6 @@ def test_any_all_object(self):
12801280
assert result is False
12811281

12821282
def test_any_all_object_bool_only(self):
1283-
msg = "object-dtype columns with all-bool values"
12841283

12851284
df = DataFrame({"A": ["foo", 2], "B": [True, False]}).astype(object)
12861285
df._consolidate_inplace()
@@ -1291,36 +1290,29 @@ def test_any_all_object_bool_only(self):
12911290

12921291
# The underlying bug is in DataFrame._get_bool_data, so we check
12931292
# that while we're here
1294-
with tm.assert_produces_warning(FutureWarning, match=msg):
1295-
res = df._get_bool_data()
1296-
expected = df[["B", "C"]]
1293+
res = df._get_bool_data()
1294+
expected = df[["C"]]
12971295
tm.assert_frame_equal(res, expected)
12981296

1299-
with tm.assert_produces_warning(FutureWarning, match=msg):
1300-
res = df.all(bool_only=True, axis=0)
1301-
expected = Series([False, True], index=["B", "C"])
1297+
res = df.all(bool_only=True, axis=0)
1298+
expected = Series([True], index=["C"])
13021299
tm.assert_series_equal(res, expected)
13031300

13041301
# operating on a subset of columns should not produce a _larger_ Series
1305-
with tm.assert_produces_warning(FutureWarning, match=msg):
1306-
res = df[["B", "C"]].all(bool_only=True, axis=0)
1302+
res = df[["B", "C"]].all(bool_only=True, axis=0)
13071303
tm.assert_series_equal(res, expected)
13081304

1309-
with tm.assert_produces_warning(FutureWarning, match=msg):
1310-
assert not df.all(bool_only=True, axis=None)
1305+
assert df.all(bool_only=True, axis=None)
13111306

1312-
with tm.assert_produces_warning(FutureWarning, match=msg):
1313-
res = df.any(bool_only=True, axis=0)
1314-
expected = Series([True, True], index=["B", "C"])
1307+
res = df.any(bool_only=True, axis=0)
1308+
expected = Series([True], index=["C"])
13151309
tm.assert_series_equal(res, expected)
13161310

13171311
# operating on a subset of columns should not produce a _larger_ Series
1318-
with tm.assert_produces_warning(FutureWarning, match=msg):
1319-
res = df[["B", "C"]].any(bool_only=True, axis=0)
1312+
res = df[["C"]].any(bool_only=True, axis=0)
13201313
tm.assert_series_equal(res, expected)
13211314

1322-
with tm.assert_produces_warning(FutureWarning, match=msg):
1323-
assert df.any(bool_only=True, axis=None)
1315+
assert df.any(bool_only=True, axis=None)
13241316

13251317
@pytest.mark.parametrize("method", ["any", "all"])
13261318
def test_any_all_level_axis_none_raises(self, method):

pandas/tests/internals/test_internals.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -795,17 +795,15 @@ def test_get_numeric_data(self, using_copy_on_write):
795795
)
796796

797797
def test_get_bool_data(self, using_copy_on_write):
798-
msg = "object-dtype columns with all-bool values"
799798
mgr = create_mgr(
800799
"int: int; float: float; complex: complex;"
801800
"str: object; bool: bool; obj: object; dt: datetime",
802801
item_shape=(3,),
803802
)
804803
mgr.iset(6, np.array([True, False, True], dtype=np.object_))
805804

806-
with tm.assert_produces_warning(FutureWarning, match=msg):
807-
bools = mgr.get_bool_data()
808-
tm.assert_index_equal(bools.items, Index(["bool", "dt"]))
805+
bools = mgr.get_bool_data()
806+
tm.assert_index_equal(bools.items, Index(["bool"]))
809807
tm.assert_almost_equal(
810808
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
811809
bools.iget(bools.items.get_loc("bool")).internal_values(),
@@ -824,8 +822,7 @@ def test_get_bool_data(self, using_copy_on_write):
824822
)
825823

826824
# Check sharing
827-
with tm.assert_produces_warning(FutureWarning, match=msg):
828-
bools2 = mgr.get_bool_data(copy=True)
825+
bools2 = mgr.get_bool_data(copy=True)
829826
bools2.iset(0, np.array([False, True, False]))
830827
if using_copy_on_write:
831828
tm.assert_numpy_array_equal(

0 commit comments

Comments
 (0)