From 5dba4ccd998f9c6efa36e39184317709e0a9a4d8 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Tue, 29 Oct 2024 20:11:46 -0400 Subject: [PATCH 01/11] extra check when casting to boolean --- pandas/core/dtypes/cast.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6ba07b1761557..4c02aa43c3a32 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1918,7 +1918,17 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # i.e. there are pd.NA elements raise LossySetitemError return element + with np.errstate(invalid="ignore"): + # We check afterwards if cast was losslessly, so no need to show + # the warning + casted = element.astype(dtype) + comp = casted == element + if comp.all(): + # Return the casted values bc they can be passed to + # np.putmask, whereas the raw values cannot. + return casted raise LossySetitemError + if lib.is_bool(element): return element raise LossySetitemError From f431efc71e1e61d507338a16454afafb229e4955 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Tue, 29 Oct 2024 20:14:09 -0400 Subject: [PATCH 02/11] full check for casting to boolean --- pandas/core/dtypes/cast.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4c02aa43c3a32..974ccdc786fdc 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1918,14 +1918,11 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # i.e. there are pd.NA elements raise LossySetitemError return element + # GH 57338 with np.errstate(invalid="ignore"): - # We check afterwards if cast was losslessly, so no need to show - # the warning casted = element.astype(dtype) comp = casted == element if comp.all(): - # Return the casted values bc they can be passed to - # np.putmask, whereas the raw values cannot. return casted raise LossySetitemError From 50f9a20849879907ae05f2c8a4b62c0188cd3e6b Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Tue, 29 Oct 2024 20:20:14 -0400 Subject: [PATCH 03/11] specify bug --- pandas/core/dtypes/cast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 974ccdc786fdc..4d4f5789a2e36 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1919,6 +1919,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: raise LossySetitemError return element # GH 57338 + # Check boolean array set as object type with np.errstate(invalid="ignore"): casted = element.astype(dtype) comp = casted == element From 636630368f9b8075a4d5c82f2c99bca79038d974 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Tue, 29 Oct 2024 21:39:17 -0400 Subject: [PATCH 04/11] fixed comparison with 1 and 0 --- pandas/core/dtypes/cast.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4d4f5789a2e36..876b267b80beb 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1921,9 +1921,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # GH 57338 # Check boolean array set as object type with np.errstate(invalid="ignore"): - casted = element.astype(dtype) - comp = casted == element - if comp.all(): + casted = element.astype(dtype).astype("object") + element_object = element.astype("object") + comp = [i is j for i, j in zip(element_object, casted)] + if all(comp): return casted raise LossySetitemError From 592fc5d32e714dfaa7750041d3a3bc9c337f9c4c Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Tue, 29 Oct 2024 21:43:32 -0400 Subject: [PATCH 05/11] return casted as bool --- pandas/core/dtypes/cast.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 876b267b80beb..4064b948f3c57 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1921,9 +1921,11 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # GH 57338 # Check boolean array set as object type with np.errstate(invalid="ignore"): - casted = element.astype(dtype).astype("object") - element_object = element.astype("object") - comp = [i is j for i, j in zip(element_object, casted)] + casted = element.astype(dtype) + comp = [ + i is j + for i, j in zip(element.astype("object"), casted.astype("object")) + ] if all(comp): return casted raise LossySetitemError From 14daddf8387d3a80588f593e3189b14aa52db8dd Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Tue, 29 Oct 2024 22:28:16 -0400 Subject: [PATCH 06/11] using lib is_bool to check for bool --- pandas/core/dtypes/cast.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4064b948f3c57..c1513df594f2f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1920,14 +1920,9 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return element # GH 57338 # Check boolean array set as object type - with np.errstate(invalid="ignore"): - casted = element.astype(dtype) - comp = [ - i is j - for i, j in zip(element.astype("object"), casted.astype("object")) - ] + comp = [lib.is_bool(e) for e in element] if all(comp): - return casted + return element.astype("bool") raise LossySetitemError if lib.is_bool(element): From d1548b0dc7d50174c95a80acfb5f2a47a2171d67 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Tue, 29 Oct 2024 22:54:47 -0400 Subject: [PATCH 07/11] made element iterable --- pandas/core/dtypes/cast.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c1513df594f2f..0327d0730029d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -110,7 +110,6 @@ TimedeltaArray, ) - _int8_max = np.iinfo(np.int8).max _int16_max = np.iinfo(np.int16).max _int32_max = np.iinfo(np.int32).max @@ -1920,7 +1919,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return element # GH 57338 # Check boolean array set as object type - comp = [lib.is_bool(e) for e in element] + comp = [lib.is_bool(e) for e in np.array([element]).flatten()] if all(comp): return element.astype("bool") raise LossySetitemError From aee48c98828d956f9b7694137ffe5103883621ff Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Tue, 29 Oct 2024 22:57:43 -0400 Subject: [PATCH 08/11] ravel instead of flatten --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0327d0730029d..77575a655b81d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1919,7 +1919,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return element # GH 57338 # Check boolean array set as object type - comp = [lib.is_bool(e) for e in np.array([element]).flatten()] + comp = [lib.is_bool(e) for e in np.array([element]).ravel()] if all(comp): return element.astype("bool") raise LossySetitemError From 079c403f804f12c64b5edeff941cf0acbbf430b2 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Thu, 31 Oct 2024 15:21:00 -0400 Subject: [PATCH 09/11] more efficient check --- pandas/core/dtypes/cast.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 77575a655b81d..64038edde6bfa 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1919,9 +1919,9 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return element # GH 57338 # Check boolean array set as object type - comp = [lib.is_bool(e) for e in np.array([element]).ravel()] - if all(comp): - return element.astype("bool") + if tipo.kind == "O" and isinstance(element, np.ndarray): + if all(lib.is_bool(e) for e in element): + return element.astype("bool") raise LossySetitemError if lib.is_bool(element): From e8c10255b3cc09f1ea7fbb44cc52ace53b62e9ae Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Thu, 31 Oct 2024 15:48:06 -0400 Subject: [PATCH 10/11] added test case --- pandas/tests/dtypes/cast/test_can_hold_element.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py index 3b7d76ead119a..fc56c395f1e0f 100644 --- a/pandas/tests/dtypes/cast/test_can_hold_element.py +++ b/pandas/tests/dtypes/cast/test_can_hold_element.py @@ -77,3 +77,17 @@ def test_can_hold_element_int8_int(): assert not can_hold_element(arr, np.uint32(element)) assert not can_hold_element(arr, np.int64(element)) assert not can_hold_element(arr, np.uint64(element)) + + +def test_can_hold_element_bool(): + arr = np.array([], dtype=bool) + + element = True + assert can_hold_element(arr, element) + assert can_hold_element(arr, np.array([element])) + assert can_hold_element(arr, np.array([element], dtype=object)) + + element = 1 + assert not can_hold_element(arr, element) + assert not can_hold_element(arr, np.array([element])) + assert not can_hold_element(arr, np.array([element], dtype=object)) From 0232b8b708a6368cd9d30755c76ae9c12603cd10 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Thu, 31 Oct 2024 16:01:36 -0400 Subject: [PATCH 11/11] format --- pandas/core/dtypes/cast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 64038edde6bfa..e16d3ce3d62b0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -110,6 +110,7 @@ TimedeltaArray, ) + _int8_max = np.iinfo(np.int8).max _int16_max = np.iinfo(np.int16).max _int32_max = np.iinfo(np.int32).max