diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 7920820b32620..8fdb442b6e16d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -314,7 +314,7 @@ Conversion Strings ^^^^^^^ -- +- Bug in the :meth:`~Series.astype` method when converting "string" dtype data to nullable integer dtype (:issue:`32450`). - diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f82790ac4c3d9..dbca8e74f5e1b 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -13,7 +13,8 @@ from pandas import compat from pandas.core import ops -from pandas.core.arrays import PandasArray +from pandas.core.arrays import IntegerArray, PandasArray +from pandas.core.arrays.integer import _IntegerDtype from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer from pandas.core.missing import isna @@ -271,6 +272,13 @@ def astype(self, dtype, copy=True): if copy: return self.copy() return self + elif isinstance(dtype, _IntegerDtype): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = 0 + values = arr.astype(dtype.numpy_dtype) + return IntegerArray(values, mask, copy=False) + return super().astype(dtype, copy) def _reduce(self, name, skipna=True, **kwargs): diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 5e2f14af341ab..fe770eed84b62 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -214,6 +214,14 @@ def test_from_sequence_no_mutate(copy): tm.assert_numpy_array_equal(a, original) +def test_astype_int(): + arr = pd.array(["1", pd.NA, "3"], dtype="string") + + result = arr.astype("Int64") + expected = pd.array([1, pd.NA, 3], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.xfail(reason="Not implemented StringArray.sum") def test_reduce(skipna):