From ee001a90396cbd89be730f71eb139e29cd1ba503 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 18:46:01 -0800 Subject: [PATCH 1/7] BUG: FloatingArray(float16data) --- doc/source/whatsnew/v1.4.0.rst | 2 ++ pandas/core/arrays/floating.py | 4 ++++ pandas/core/arrays/masked.py | 4 ++++ pandas/core/construction.py | 7 ++++++- pandas/tests/arrays/boolean/test_function.py | 11 +++++++++++ pandas/tests/arrays/floating/test_construction.py | 12 ++++++++++++ pandas/tests/arrays/test_array.py | 6 ++++++ 7 files changed, 45 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index fd7cb6a69d955..1f9b8271d7c8b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -803,6 +803,8 @@ ExtensionArray - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`) - Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`) - Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`) +- Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`??`) +- Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`??`) - Styler diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 5e55715ee0e97..396ed7eb4abeb 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -254,6 +254,10 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): "values should be floating numpy array. Use " "the 'pd.array' function instead" ) + if values.dtype == np.float16: + # If we don't raise here, then accessing self.dtype would raise + raise TypeError("FloatingArray does not support np.float16 dtype.") + super().__init__(values, mask, copy=copy) @classmethod diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index cc61fb4d93ffc..a882fe5d2da21 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -466,6 +466,10 @@ def reconstruct(x): return IntegerArray(x, m) elif is_float_dtype(x.dtype): m = mask.copy() + if x.dtype == np.float16: + # reached in e.g. np.sqrt on BooleanArray + # we don't support float16 + x = x.astype(np.float32) return FloatingArray(x, m) else: x[mask] = np.nan diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e3b41f2c7b8c2..bda45b1b3ac52 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -353,7 +353,12 @@ def array( elif inferred_dtype == "integer": return IntegerArray._from_sequence(data, copy=copy) - elif inferred_dtype in ("floating", "mixed-integer-float"): + elif ( + inferred_dtype in ("floating", "mixed-integer-float") + and getattr(data, "dtype", None) != np.float16 + ): + # Exclude np.float16 bc FloatingArray does not support it; + # we will fall back to PandasArray. return FloatingArray._from_sequence(data, copy=copy) elif inferred_dtype == "boolean": diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index 2f1a3121cdf5b..db362afc80087 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -66,6 +66,17 @@ def test_ufuncs_unary(ufunc): tm.assert_series_equal(result, expected) +def test_ufunc_numeric(): + # np.sqrt on np.bool returns float16, which we upcast to Float32 + # bc we do not have Float16 + arr = pd.array([True, False, None], dtype="boolean") + + res = np.sqrt(arr) + + expected = pd.array([1, 0, None], dtype="Float32") + tm.assert_extension_array_equal(res, expected) + + @pytest.mark.parametrize("values", [[True, False], [True, None]]) def test_ufunc_reduce_raises(values): a = pd.array(values, dtype="boolean") diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index 4b7b237d2eb7c..20fa5697de8bc 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -40,6 +40,18 @@ def test_floating_array_constructor(): FloatingArray(values) +def test_floating_array_disallows_float16(): + arr = np.array([1, 2], dtype=np.float16) + mask = np.array([False, False]) + + msg = "FloatingArray does not support np.float16 dtype" + with pytest.raises(TypeError, match=msg): + FloatingArray(arr, mask) + + with pytest.raises(TypeError, match="data type 'Float16' not understood"): + pd.array([1.0, 2.0], dtype="Float16") + + def test_floating_array_constructor_copy(): values = np.array([1, 2, 3, 4], dtype="float64") mask = np.array([False, False, False, True], dtype="bool") diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index e953e7dc6dcba..98bdcdcee1960 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -51,6 +51,12 @@ # String alias passes through to NumPy ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))), ([1, 2], "int64", PandasArray(np.array([1, 2], dtype=np.int64))), + # FloatingArray does not support float16, so we fall back to PandasArray + ( + np.array([1, 2], dtype=np.float16), + None, + PandasArray(np.array([1, 2], dtype=np.float16)), + ), # idempotency with e.g. pd.array(pd.array([1, 2], dtype="int64")) ( PandasArray(np.array([1, 2], dtype=np.int32)), From 3524338dfce91c866206b59718a11812ef3d900e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 18:48:26 -0800 Subject: [PATCH 2/7] GH ref --- doc/source/whatsnew/v1.4.0.rst | 4 ++-- pandas/core/construction.py | 2 +- pandas/tests/arrays/floating/test_construction.py | 1 + pandas/tests/arrays/test_array.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 1f9b8271d7c8b..87e00ce75da7a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -803,8 +803,8 @@ ExtensionArray - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`) - Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`) - Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`) -- Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`??`) -- Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`??`) +- Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`) +- Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`) - Styler diff --git a/pandas/core/construction.py b/pandas/core/construction.py index bda45b1b3ac52..8261162afe187 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -357,7 +357,7 @@ def array( inferred_dtype in ("floating", "mixed-integer-float") and getattr(data, "dtype", None) != np.float16 ): - # Exclude np.float16 bc FloatingArray does not support it; + # GH#44715 Exclude np.float16 bc FloatingArray does not support it; # we will fall back to PandasArray. return FloatingArray._from_sequence(data, copy=copy) diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index 20fa5697de8bc..ca01b2f4897cc 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -41,6 +41,7 @@ def test_floating_array_constructor(): def test_floating_array_disallows_float16(): + # GH#44715 arr = np.array([1, 2], dtype=np.float16) mask = np.array([False, False]) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 98bdcdcee1960..7f612e77c4ad3 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -51,7 +51,7 @@ # String alias passes through to NumPy ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))), ([1, 2], "int64", PandasArray(np.array([1, 2], dtype=np.int64))), - # FloatingArray does not support float16, so we fall back to PandasArray + # GH#44715 FloatingArray does not support float16, so fall back to PandasArray ( np.array([1, 2], dtype=np.float16), None, From fe0a0029e3745c512491db0944fe88aaae4a23bc Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Dec 2021 11:01:04 -0800 Subject: [PATCH 3/7] xfail --- pandas/tests/arrays/floating/test_construction.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index ca01b2f4897cc..b8ef067da4b7a 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.compat import np_version_under1p19 + import pandas as pd import pandas._testing as tm from pandas.core.arrays import FloatingArray @@ -40,7 +42,7 @@ def test_floating_array_constructor(): FloatingArray(values) -def test_floating_array_disallows_float16(): +def test_floating_array_disallows_float16(request): # GH#44715 arr = np.array([1, 2], dtype=np.float16) mask = np.array([False, False]) @@ -49,6 +51,10 @@ def test_floating_array_disallows_float16(): with pytest.raises(TypeError, match=msg): FloatingArray(arr, mask) + if np_version_under1p19: + mark = pytest.mark.xfail(reason="numpy does not raise on np.dtype('Float16')") + request.node.add_marker(mark) + with pytest.raises(TypeError, match="data type 'Float16' not understood"): pd.array([1.0, 2.0], dtype="Float16") From d3a3fa3737ddc6d994738289faabb70bda049209 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Dec 2021 21:00:51 -0800 Subject: [PATCH 4/7] xfail --- pandas/tests/arrays/floating/test_construction.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index b8ef067da4b7a..c0a2c0629dc6f 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -1,3 +1,5 @@ +import locale + import numpy as np import pytest @@ -51,7 +53,9 @@ def test_floating_array_disallows_float16(request): with pytest.raises(TypeError, match=msg): FloatingArray(arr, mask) - if np_version_under1p19: + if np_version_under1p19 or locale.getlocale()[0] != "en_US": + # the locale condition may need to be refined; this fails on + # the CI in the ZH_CN build mark = pytest.mark.xfail(reason="numpy does not raise on np.dtype('Float16')") request.node.add_marker(mark) From a51581768229d36a60eeba2c8670a2414bce6dd9 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 Dec 2021 12:58:27 -0800 Subject: [PATCH 5/7] troubleshoot --- pandas/tests/arrays/floating/test_construction.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index c0a2c0629dc6f..e3a8cd706ed72 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -53,6 +53,10 @@ def test_floating_array_disallows_float16(request): with pytest.raises(TypeError, match=msg): FloatingArray(arr, mask) + # Troubleshoot https://github.com/numpy/numpy/issues/20512#issuecomment-985807740 + lowered = np.core._type_aliases.english_lower("Float16") + assert lowered == "float16", lowered + if np_version_under1p19 or locale.getlocale()[0] != "en_US": # the locale condition may need to be refined; this fails on # the CI in the ZH_CN build From 26ecaaea00c960cb66133e1a62dc26575de06f18 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 Dec 2021 12:58:59 -0800 Subject: [PATCH 6/7] only on newer numpy --- pandas/tests/arrays/floating/test_construction.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index e3a8cd706ed72..a4aa21c76f3c0 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -53,9 +53,11 @@ def test_floating_array_disallows_float16(request): with pytest.raises(TypeError, match=msg): FloatingArray(arr, mask) - # Troubleshoot https://github.com/numpy/numpy/issues/20512#issuecomment-985807740 - lowered = np.core._type_aliases.english_lower("Float16") - assert lowered == "float16", lowered + if not np_version_under1p19: + # Troubleshoot + # https://github.com/numpy/numpy/issues/20512#issuecomment-985807740 + lowered = np.core._type_aliases.english_lower("Float16") + assert lowered == "float16", lowered if np_version_under1p19 or locale.getlocale()[0] != "en_US": # the locale condition may need to be refined; this fails on From 1e963be5384dc6614f7b7c7822a1209a0d67dc99 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 Dec 2021 14:42:39 -0800 Subject: [PATCH 7/7] troubleshoot --- pandas/tests/arrays/floating/test_construction.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index a4aa21c76f3c0..703bdb3dbd5bb 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas.compat import np_version_under1p19 +from pandas.compat import ( + is_platform_windows, + np_version_under1p19, +) import pandas as pd import pandas._testing as tm @@ -59,7 +62,9 @@ def test_floating_array_disallows_float16(request): lowered = np.core._type_aliases.english_lower("Float16") assert lowered == "float16", lowered - if np_version_under1p19 or locale.getlocale()[0] != "en_US": + if np_version_under1p19 or ( + locale.getlocale()[0] != "en_US" and not is_platform_windows() + ): # the locale condition may need to be refined; this fails on # the CI in the ZH_CN build mark = pytest.mark.xfail(reason="numpy does not raise on np.dtype('Float16')")