diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a81fb584c8df9..910ef278a22a9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -389,6 +389,7 @@ Numeric Conversion ^^^^^^^^^^ +- Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 1154130b9bed3..0240433cdb683 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -525,6 +525,8 @@ def _box_pa_array( if pa_type is not None and pa_array.type != pa_type: if pa.types.is_dictionary(pa_type): pa_array = pa_array.dictionary_encode() + if pa_array.type != pa_type: + pa_array = pa_array.cast(pa_type) else: try: pa_array = pa_array.cast(pa_type) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 79440b55dd5dd..7d31fe6085c3a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3498,6 +3498,14 @@ def test_to_numpy_timestamp_to_int(): tm.assert_numpy_array_equal(result, expected) +@pytest.mark.parametrize("arrow_type", [pa.large_string(), pa.string()]) +def test_cast_dictionary_different_value_dtype(arrow_type): + df = pd.DataFrame({"a": ["x", "y"]}, dtype="string[pyarrow]") + data_type = ArrowDtype(pa.dictionary(pa.int32(), arrow_type)) + result = df.astype({"a": data_type}) + assert result.dtypes.iloc[0] == data_type + + def test_map_numeric_na_action(): ser = pd.Series([32, 40, None], dtype="int64[pyarrow]") result = ser.map(lambda x: 42, na_action="ignore")