From 57aa4428d64bc517ae17fcbef2d22446151dbe06 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 13 Jun 2023 11:31:00 -0700 Subject: [PATCH] BUG: convert_dtype(dtype_backend=nullable_numpy) with ArrowDtype --- doc/source/whatsnew/v2.0.3.rst | 2 +- pandas/core/dtypes/cast.py | 5 +++++ pandas/tests/frame/methods/test_convert_dtypes.py | 10 +++++++++- pandas/tests/series/methods/test_convert_dtypes.py | 8 ++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index 3da469c2e1fe6..2570ef7d28d41 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -21,10 +21,10 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Bug in :func:`DataFrame.convert_dtype` and :func:`Series.convert_dtype` when trying to convert :class:`ArrowDtype` with ``dtype_backend="nullable_numpy"`` (:issue:`53648`) - Bug in :func:`RangeIndex.union` when using ``sort=True`` with another :class:`RangeIndex` (:issue:`53490`) - Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`) - Bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` with ``expand=True`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`53532`) -- .. --------------------------------------------------------------------------- .. _whatsnew_203.other: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 831b368f58225..58488db10d2ee 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -79,6 +79,8 @@ notna, ) +from pandas.io._util import _arrow_dtype_mapping + if TYPE_CHECKING: from pandas._typing import ( ArrayLike, @@ -1110,6 +1112,9 @@ def convert_dtypes( pa_type = to_pyarrow_type(base_dtype) if pa_type is not None: inferred_dtype = ArrowDtype(pa_type) + elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype): + # GH 53648 + inferred_dtype = _arrow_dtype_mapping()[inferred_dtype.pyarrow_dtype] # error: Incompatible return value type (got "Union[str, Union[dtype[Any], # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]") diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index 2adee158379bb..082ef025992dd 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -146,7 +146,7 @@ def test_pyarrow_engine_lines_false(self): with pytest.raises(ValueError, match=msg): df.convert_dtypes(dtype_backend="numpy") - def test_pyarrow_backend_no_convesion(self): + def test_pyarrow_backend_no_conversion(self): # GH#52872 pytest.importorskip("pyarrow") df = pd.DataFrame({"a": [1, 2], "b": 1.5, "c": True, "d": "x"}) @@ -159,3 +159,11 @@ def test_pyarrow_backend_no_convesion(self): dtype_backend="pyarrow", ) tm.assert_frame_equal(result, expected) + + def test_convert_dtypes_pyarrow_to_np_nullable(self): + # GH 53648 + pytest.importorskip("pyarrow") + ser = pd.DataFrame(range(2), dtype="int32[pyarrow]") + result = ser.convert_dtypes(dtype_backend="numpy_nullable") + expected = pd.DataFrame(range(2), dtype="Int32") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index d91cd6a43daea..b0f5093e4951d 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -240,3 +240,11 @@ def test_convert_dtype_object_with_na_float(self, infer_objects, dtype): result = ser.convert_dtypes(infer_objects=infer_objects) expected = pd.Series([1.5, pd.NA], dtype=dtype) tm.assert_series_equal(result, expected) + + def test_convert_dtypes_pyarrow_to_np_nullable(self): + # GH 53648 + pytest.importorskip("pyarrow") + ser = pd.Series(range(2), dtype="int32[pyarrow]") + result = ser.convert_dtypes(dtype_backend="numpy_nullable") + expected = pd.Series(range(2), dtype="Int32") + tm.assert_series_equal(result, expected)