From c4c168b29a7ab38a4cc0255d4e60d323198c7d66 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 4 Jan 2023 14:07:44 -0800 Subject: [PATCH 1/3] BUG: is_numeric_dtype(ArrowDtype[numeric]) not returning True --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/dtypes/common.py | 2 ++ pandas/tests/dtypes/test_common.py | 18 ++++++++++++++++++ pandas/tests/extension/test_arrow.py | 18 +++++++++++++++++- 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index ea6a832d25058..8182af3f4bca2 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -983,6 +983,7 @@ ExtensionArray - Bug in :meth:`Series.round` for pyarrow-backed dtypes raising ``AttributeError`` (:issue:`50437`) - Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`) - Bug in :meth:`array.PandasArray.to_numpy` raising with ``NA`` value when ``na_value`` is specified (:issue:`40638`) +- Bug in :meth:`api.types.is_numeric_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``_is_numeric`` returned ``True`` (:issue:`50563`) Styler ^^^^^^ diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 4735731e8d6d9..aae815bb68e05 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1200,6 +1200,8 @@ def is_numeric_dtype(arr_or_dtype) -> bool: """ return _is_dtype_type( arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_) + ) or _is_dtype( + arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ._is_numeric ) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index c08514900af7c..ce900ff649eec 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -556,6 +556,24 @@ def test_is_numeric_dtype(): assert com.is_numeric_dtype(pd.Series([1, 2])) assert com.is_numeric_dtype(pd.Index([1, 2.0])) + class MyNumericDType(ExtensionDtype): + @property + def type(self): + return str + + @property + def name(self): + raise NotImplementedError + + @classmethod + def construct_array_type(cls): + raise NotImplementedError + + def _is_numeric(self) -> bool: + return True + + assert com.is_numeric_dtype(MyNumericDType()) + def test_is_float_dtype(): assert not com.is_float_dtype(str) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index c1785591f41a9..c16b00b89e726 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -37,7 +37,10 @@ import pandas as pd import pandas._testing as tm -from pandas.api.types import is_bool_dtype +from pandas.api.types import ( + is_bool_dtype, + is_numeric_dtype, +) from pandas.tests.extension import base pa = pytest.importorskip("pyarrow", minversion="1.0.1") @@ -1446,6 +1449,19 @@ def test_is_bool_dtype(): tm.assert_series_equal(result, expected) +def test_is_numeric_dtype(data): + # GH 50563 + pa_type = data.dtype.pyarrow_dtype + if ( + pa.types.is_floating(pa_type) + or pa.types.is_integer(pa_type) + or pa.types.is_decimal(pa_type) + ): + assert is_numeric_dtype(data) + else: + assert not is_numeric_dtype(data) + + def test_pickle_roundtrip(data): # GH 42600 expected = pd.Series(data) From 2c310c4800a7d7da731ee5c78896b9050f1f3c8d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 5 Jan 2023 11:37:31 -0800 Subject: [PATCH 2/3] Adjust decimal type to be numeric --- .../tests/io/json/test_json_table_schema_ext_dtype.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py index cf521aafdc241..d9232a6bddf61 100644 --- a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +++ b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py @@ -48,7 +48,7 @@ def test_build_table_schema(self): "fields": [ {"name": "index", "type": "integer"}, {"name": "A", "type": "any", "extDtype": "DateDtype"}, - {"name": "B", "type": "any", "extDtype": "decimal"}, + {"name": "B", "type": "number", "extDtype": "decimal"}, {"name": "C", "type": "any", "extDtype": "string"}, {"name": "D", "type": "integer", "extDtype": "Int64"}, ], @@ -82,10 +82,10 @@ def test_as_json_table_type_ext_date_dtype(self): ], ) def test_as_json_table_type_ext_decimal_array_dtype(self, decimal_data): - assert as_json_table_type(decimal_data.dtype) == "any" + assert as_json_table_type(decimal_data.dtype) == "number" def test_as_json_table_type_ext_decimal_dtype(self): - assert as_json_table_type(DecimalDtype()) == "any" + assert as_json_table_type(DecimalDtype()) == "number" @pytest.mark.parametrize( "string_data", @@ -180,7 +180,7 @@ def test_build_decimal_series(self, dc): fields = [ {"name": "id", "type": "integer"}, - {"name": "a", "type": "any", "extDtype": "decimal"}, + {"name": "a", "type": "number", "extDtype": "decimal"}, ] schema = {"fields": fields, "primaryKey": ["id"]} @@ -257,7 +257,7 @@ def test_to_json(self, df): fields = [ OrderedDict({"name": "idx", "type": "integer"}), OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}), - OrderedDict({"name": "B", "type": "any", "extDtype": "decimal"}), + OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}), OrderedDict({"name": "C", "type": "any", "extDtype": "string"}), OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}), ] From 8b4e6cfcf4ba18f581ef52154bc2c3b71664dedb Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 5 Jan 2023 13:09:12 -0800 Subject: [PATCH 3/3] Some arrow tests now working --- pandas/tests/extension/test_arrow.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index c16b00b89e726..848f706986b2f 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -553,16 +553,6 @@ def test_groupby_extension_apply( ): super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) - def test_in_numeric_groupby(self, data_for_grouping, request): - pa_dtype = data_for_grouping.dtype.pyarrow_dtype - if pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="ArrowExtensionArray doesn't support .sum() yet.", - ) - ) - super().test_in_numeric_groupby(data_for_grouping) - @pytest.mark.parametrize("as_index", [True, False]) def test_groupby_extension_agg(self, as_index, data_for_grouping, request): pa_dtype = data_for_grouping.dtype.pyarrow_dtype