From 0498023cee9ebbdb37e1623a3dbfb16b2d13974e Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 12 Dec 2020 11:34:31 -0500 Subject: [PATCH 1/5] REGR: Assigning label with registered EA dtype raises --- pandas/core/dtypes/common.py | 2 +- pandas/io/parsers.py | 11 +++-------- pandas/tests/dtypes/test_common.py | 2 ++ pandas/tests/frame/indexing/test_setitem.py | 13 +++++++++++++ 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index b4f6d587c6642..d8b0ad739b056 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1397,7 +1397,7 @@ def is_bool_dtype(arr_or_dtype) -> bool: # guess this return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean" elif is_extension_array_dtype(arr_or_dtype): - return getattr(arr_or_dtype, "dtype", arr_or_dtype)._is_boolean + return getattr(dtype, "_is_boolean", False) return issubclass(dtype.type, np.bool_) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5b623c360c3ef..7a366fdda9dfe 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1689,9 +1689,8 @@ def _convert_to_ndarrays( values, set(col_na_values) | col_na_fvalues, try_num_bool=False ) else: - is_str_or_ea_dtype = is_string_dtype( - cast_type - ) or is_extension_array_dtype(cast_type) + is_ea = is_extension_array_dtype(cast_type) + is_str_or_ea_dtype = is_string_dtype(cast_type) or is_ea # skip inference if specified dtype is object # or casting to an EA try_num_bool = not (cast_type and is_str_or_ea_dtype) @@ -1707,11 +1706,7 @@ def _convert_to_ndarrays( or is_extension_array_dtype(cast_type) ): try: - if ( - is_bool_dtype(cast_type) - and not is_categorical_dtype(cast_type) - and na_count > 0 - ): + if not is_ea and na_count > 0 and is_bool_dtype(cast_type): raise ValueError(f"Bool column has NA values in column {c}") except (AttributeError, TypeError): # invalid input to is_bool_dtype diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index ce6737db44195..19d80b714a674 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -545,6 +545,7 @@ def test_is_bool_dtype(): assert not com.is_bool_dtype(pd.Series([1, 2])) assert not com.is_bool_dtype(np.array(["a", "b"])) assert not com.is_bool_dtype(pd.Index(["a", "b"])) + assert not com.is_bool_dtype("Int64") assert com.is_bool_dtype(bool) assert com.is_bool_dtype(np.bool_) @@ -553,6 +554,7 @@ def test_is_bool_dtype(): assert com.is_bool_dtype(pd.BooleanDtype()) assert com.is_bool_dtype(pd.array([True, False, None], dtype="boolean")) + assert com.is_bool_dtype("boolean") @pytest.mark.filterwarnings("ignore:'is_extension_type' is deprecated:FutureWarning") diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 884cb6c20b77e..12cf02960af0e 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from pandas.core.dtypes.base import registry as ea_registry from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype from pandas import ( @@ -197,6 +198,18 @@ def test_setitem_extension_types(self, obj, dtype): tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize( + "ea_name", + # Don't test if name is a property + [dtype.name for dtype in ea_registry.dtypes if isinstance(dtype.name, str)], + ) + def test_setitem_with_ea_name(self, ea_name): + # GH 38386 + result = DataFrame([0]) + result[ea_name] = [1] + expected = DataFrame({0: [0], ea_name: [1]}) + tm.assert_frame_equal(result, expected) + def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self): # GH#7492 data_ns = np.array([1, "nat"], dtype="datetime64[ns]") From 74b2df92913e85100a3fced74cb9bbeff91e2b3c Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 12 Dec 2020 12:32:51 -0500 Subject: [PATCH 2/5] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/frame/indexing/test_setitem.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index af9219bc25931..3eefd8c237d43 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -856,7 +856,7 @@ Other - Bug in :meth:`Index.drop` raising ``InvalidIndexError`` when index has duplicates (:issue:`38051`) - Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`) - Fixed bug in :func:`assert_series_equal` when comparing a datetime-like array with an equivalent non extension dtype array (:issue:`37609`) - +- Bug in :func:`.is_bool_dtype` would raise when passed a valid string such as ``"boolean"`` (:issue:`38386`). .. --------------------------------------------------------------------------- diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 12cf02960af0e..e7d922f308200 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -200,8 +200,12 @@ def test_setitem_extension_types(self, obj, dtype): @pytest.mark.parametrize( "ea_name", - # Don't test if name is a property - [dtype.name for dtype in ea_registry.dtypes if isinstance(dtype.name, str)], + # property would require instantiation + [ + dtype.name + for dtype in ea_registry.dtypes + if not isinstance(dtype.name, property) + ], ) def test_setitem_with_ea_name(self, ea_name): # GH 38386 From ed81c909fed15f6e9f67f46d7ecab5d0e7f162ec Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 13 Dec 2020 08:57:26 -0500 Subject: [PATCH 3/5] Added tests to test_setitem_with_ea_name --- pandas/tests/frame/indexing/test_setitem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index e7d922f308200..4a65debb5c84c 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -205,7 +205,8 @@ def test_setitem_extension_types(self, obj, dtype): dtype.name for dtype in ea_registry.dtypes if not isinstance(dtype.name, property) - ], + ] + + ["datetime64[ns, UTC]", "period[D]"], ) def test_setitem_with_ea_name(self, ea_name): # GH 38386 From a3cbfec06672f1c3e4a2b6005fbe3656f0675dbe Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 13 Dec 2020 10:31:11 -0500 Subject: [PATCH 4/5] mypy issue --- pandas/tests/frame/indexing/test_setitem.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 4a65debb5c84c..aa02fe9887fab 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -200,13 +200,17 @@ def test_setitem_extension_types(self, obj, dtype): @pytest.mark.parametrize( "ea_name", - # property would require instantiation + # mypy doesn't allow adding lists of different types + # https://github.com/python/mypy/issues/5492 [ - dtype.name - for dtype in ea_registry.dtypes - if not isinstance(dtype.name, property) - ] - + ["datetime64[ns, UTC]", "period[D]"], + *[ + dtype.name + for dtype in ea_registry.dtypes + # property would require instantiation + if not isinstance(dtype.name, property) + ], + *["datetime64[ns, UTC]", "period[D]"], + ], ) def test_setitem_with_ea_name(self, ea_name): # GH 38386 From 605d3f3d838ecadd38d44f0c679abbb44d774cf4 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 13 Dec 2020 17:49:11 -0500 Subject: [PATCH 5/5] Refinements from feedback --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/io/parsers.py | 17 ++++++++++------- pandas/tests/frame/indexing/test_setitem.py | 16 +++++++--------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 3eefd8c237d43..da8487d52ca2f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -856,7 +856,7 @@ Other - Bug in :meth:`Index.drop` raising ``InvalidIndexError`` when index has duplicates (:issue:`38051`) - Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`) - Fixed bug in :func:`assert_series_equal` when comparing a datetime-like array with an equivalent non extension dtype array (:issue:`37609`) -- Bug in :func:`.is_bool_dtype` would raise when passed a valid string such as ``"boolean"`` (:issue:`38386`). +- Bug in :func:`.is_bool_dtype` would raise when passed a valid string such as ``"boolean"`` (:issue:`38386`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 7a366fdda9dfe..10ef8e77731fb 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1690,7 +1690,7 @@ def _convert_to_ndarrays( ) else: is_ea = is_extension_array_dtype(cast_type) - is_str_or_ea_dtype = is_string_dtype(cast_type) or is_ea + is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type) # skip inference if specified dtype is object # or casting to an EA try_num_bool = not (cast_type and is_str_or_ea_dtype) @@ -1705,12 +1705,15 @@ def _convert_to_ndarrays( not is_dtype_equal(cvals, cast_type) or is_extension_array_dtype(cast_type) ): - try: - if not is_ea and na_count > 0 and is_bool_dtype(cast_type): - raise ValueError(f"Bool column has NA values in column {c}") - except (AttributeError, TypeError): - # invalid input to is_bool_dtype - pass + if not is_ea and na_count > 0: + try: + if is_bool_dtype(cast_type): + raise ValueError( + f"Bool column has NA values in column {c}" + ) + except (AttributeError, TypeError): + # invalid input to is_bool_dtype + pass cvals = self._cast_types(cvals, cast_type, c) result[c] = cvals diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index aa02fe9887fab..19d2f8301037a 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -200,17 +200,15 @@ def test_setitem_extension_types(self, obj, dtype): @pytest.mark.parametrize( "ea_name", + [ + dtype.name + for dtype in ea_registry.dtypes + # property would require instantiation + if not isinstance(dtype.name, property) + ] # mypy doesn't allow adding lists of different types # https://github.com/python/mypy/issues/5492 - [ - *[ - dtype.name - for dtype in ea_registry.dtypes - # property would require instantiation - if not isinstance(dtype.name, property) - ], - *["datetime64[ns, UTC]", "period[D]"], - ], + + ["datetime64[ns, UTC]", "period[D]"], # type: ignore[list-item] ) def test_setitem_with_ea_name(self, ea_name): # GH 38386