From 1925fd5ac735027fee8b70a5440e96b611900d64 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 11 Feb 2023 09:08:37 +0000 Subject: [PATCH 1/3] BUG: non-64-bit numeric dtypes should raise in IntervalDtype constructor --- doc/source/whatsnew/v2.0.0.rst | 2 ++ pandas/core/arrays/interval.py | 1 + pandas/core/dtypes/common.py | 50 ++++++++++++++++++++++++++++++ pandas/core/dtypes/dtypes.py | 8 +++++ pandas/tests/dtypes/test_dtypes.py | 16 +++++++--- 5 files changed, 72 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 29f360e050548..6361d22f0ba73 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1238,6 +1238,8 @@ Interval - Bug in :meth:`IntervalIndex.is_overlapping` incorrect output if interval has duplicate left boundaries (:issue:`49581`) - Bug in :meth:`Series.infer_objects` failing to infer :class:`IntervalDtype` for an object series of :class:`Interval` objects (:issue:`50090`) - Bug in :meth:`Series.shift` with :class:`IntervalDtype` and invalid null ``fill_value`` failing to raise ``TypeError`` (:issue:`51258`) +- Bug in :class:`IntervalDtype` where it accepted non-64-bit numeric subtypes, even though :class:`arrays.InterArray` only can hold numeric data if it is 64-bit. + Supplying non-64-bit numeric subtypes to :class:`IntervalDtype` now raises a ``TypeError`` (:issue:`45412`) - Indexing diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 6805d32049d34..a0a172b97d390 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -270,6 +270,7 @@ def __new__( copy=copy, dtype=dtype, ) + 1 / 0 if verify_integrity: cls._validate(left, right, dtype=dtype) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 3444ad77c2981..009634bd3e030 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -863,6 +863,56 @@ def is_int64_dtype(arr_or_dtype) -> bool: return _is_dtype_type(arr_or_dtype, classes(np.int64)) +def is_64bit_real_numeric_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of 64-bit dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of 64-bit dtype. + + Examples + -------- + >>> is_64bit_real_numeric_dtype(str) + False + >>> is_64bit_real_numeric_dtype(np.int32) + False + >>> is_64bit_real_numeric_dtype(np.int64) + True + >>> is_64bit_real_numeric_dtype('int8') + False + >>> is_64bit_real_numeric_dtype('Int8') + True + >>> is_64bit_real_numeric_dtype(pd.Int64Dtype) + True + >>> is_64bit_real_numeric_dtype(float) + True + >>> is_64bit_real_numeric_dtype(np.uint64) # unsigned + True + >>> is_64bit_real_numeric_dtype(np.array(['a', 'b'])) + False + >>> is_64bit_real_numeric_dtype(np.array([1, 2], dtype=np.int64)) + True + >>> is_64bit_real_numeric_dtype(pd.Index([1, 2.])) # float + True + >>> is_64bit_real_numeric_dtype(np.array([1, 2], dtype=np.uint32)) + False + """ + return _is_dtype_type( + arr_or_dtype, classes(np.int64, np.uint64, np.float64) + ) or _is_dtype( + arr_or_dtype, + lambda typ: isinstance(typ, ExtensionDtype) + and typ.type in (np.int64, np.uint64, np.float64), + ) + + def is_datetime64_any_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of the datetime64 dtype. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 33ff6d1eee686..2d67af4a8b5d2 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1082,6 +1082,8 @@ class IntervalDtype(PandasExtensionDtype): def __new__(cls, subtype=None, closed: str_type | None = None): from pandas.core.dtypes.common import ( + is_64bit_real_numeric_dtype, + is_any_real_numeric_dtype, is_string_dtype, pandas_dtype, ) @@ -1132,6 +1134,12 @@ def __new__(cls, subtype=None, closed: str_type | None = None): "for IntervalDtype" ) raise TypeError(msg) + elif is_any_real_numeric_dtype(subtype) and not is_64bit_real_numeric_dtype( + subtype + ): + raise TypeError( + f"numeric subtype must be 64-bit numeric dtype, was {subtype}" + ) key = f"{subtype}{closed}" try: diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index d054fb59d8561..aae22e86ca88e 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -8,6 +8,7 @@ from pandas.core.dtypes.base import _registry as registry from pandas.core.dtypes.common import ( + is_64bit_real_numeric_dtype, is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, @@ -16,6 +17,7 @@ is_datetime64tz_dtype, is_dtype_equal, is_interval_dtype, + is_numeric_dtype, is_period_dtype, is_string_dtype, ) @@ -597,6 +599,7 @@ def test_construction_generic(self, subtype): @pytest.mark.parametrize( "subtype", [ + *[x for x in tm.ALL_REAL_DTYPES if not is_64bit_real_numeric_dtype(x)], CategoricalDtype(list("abc"), False), CategoricalDtype(list("wxyz"), True), object, @@ -607,11 +610,14 @@ def test_construction_generic(self, subtype): ], ) def test_construction_not_supported(self, subtype): - # GH 19016 - msg = ( - "category, object, and string subtypes are not supported " - "for IntervalDtype" - ) + # GH19016, GH45412 + if is_numeric_dtype(subtype): + msg = "numeric subtype must be 64-bit numeric dtype, was" + else: + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalDtype" + ) with pytest.raises(TypeError, match=msg): IntervalDtype(subtype) From 1df0f0f01e033a49a996afcb5678abc34444048a Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 12 Feb 2023 11:23:45 +0000 Subject: [PATCH 2/3] remove debug statement --- pandas/core/arrays/interval.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index a0a172b97d390..6805d32049d34 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -270,7 +270,6 @@ def __new__( copy=copy, dtype=dtype, ) - 1 / 0 if verify_integrity: cls._validate(left, right, dtype=dtype) From b1a039ad46a6075f4b7ff4b6534cb24d5eea5812 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 12 Feb 2023 13:59:46 +0000 Subject: [PATCH 3/3] fix doc tests --- pandas/core/dtypes/common.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 009634bd3e030..57a43dfbf9698 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -885,15 +885,17 @@ def is_64bit_real_numeric_dtype(arr_or_dtype) -> bool: False >>> is_64bit_real_numeric_dtype(np.int64) True + >>> is_64bit_real_numeric_dtype(pd.Int64Dtype()) + True + >>> is_64bit_real_numeric_dtype("Int64") + True >>> is_64bit_real_numeric_dtype('int8') False >>> is_64bit_real_numeric_dtype('Int8') - True - >>> is_64bit_real_numeric_dtype(pd.Int64Dtype) - True + False >>> is_64bit_real_numeric_dtype(float) True - >>> is_64bit_real_numeric_dtype(np.uint64) # unsigned + >>> is_64bit_real_numeric_dtype(np.uint64) True >>> is_64bit_real_numeric_dtype(np.array(['a', 'b'])) False