From 18582fdd1b254ef76cac29061655be2832bba195 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Sat, 20 Jan 2024 13:06:16 -0500 Subject: [PATCH 1/4] raise error on unsafe decimal parse with pyarrow types --- pandas/core/arrays/arrow/array.py | 6 +----- pandas/tests/extension/test_arrow.py | 9 +++++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index bf04d86e8e476..44e12c3361e5d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -527,11 +527,7 @@ def _box_pa_array( else: try: pa_array = pa_array.cast(pa_type) - except ( - pa.ArrowInvalid, - pa.ArrowTypeError, - pa.ArrowNotImplementedError, - ): + except pa.ArrowNotImplementedError: if pa.types.is_string(pa_array.type) or pa.types.is_large_string( pa_array.type ): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 05a112e464677..7e595a79089dd 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3203,6 +3203,15 @@ def test_pow_missing_operand(): tm.assert_series_equal(result, expected) +def test_decimal_parse_raises(): + # GH 56984 + ser = pd.Series(["1.2345"], dtype=ArrowDtype(pa.string())) + with pytest.raises( + pa.lib.ArrowInvalid, match="Rescaling Decimal128 value would cause data loss" + ): + ser.astype(ArrowDtype(pa.decimal128(1, 0))) + + @pytest.mark.parametrize("pa_type", tm.TIMEDELTA_PYARROW_DTYPES) def test_duration_fillna_numpy(pa_type): # GH 54707 From 50f883bcc5723acb80d2a72e09975b75619fee1d Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Sat, 20 Jan 2024 13:43:48 -0500 Subject: [PATCH 2/4] fix min versions --- pandas/tests/extension/test_arrow.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 7e595a79089dd..41048fc209c4c 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3203,6 +3203,9 @@ def test_pow_missing_operand(): tm.assert_series_equal(result, expected) +@pytest.mark.skipif( + pa_version_under11p0, reason="Decimal128 to string cast implemented in pyarrow 11" +) def test_decimal_parse_raises(): # GH 56984 ser = pd.Series(["1.2345"], dtype=ArrowDtype(pa.string())) From 2dbafa7ebcec5a7bd89647d2aac8370656cfc9f7 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Sat, 20 Jan 2024 16:23:00 -0500 Subject: [PATCH 3/4] restore typeerrro --- pandas/core/arrays/arrow/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 44e12c3361e5d..392b4e3cc616a 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -527,7 +527,7 @@ def _box_pa_array( else: try: pa_array = pa_array.cast(pa_type) - except pa.ArrowNotImplementedError: + except (pa.ArrowNotImplementedError, pa.ArrowTypeError): if pa.types.is_string(pa_array.type) or pa.types.is_large_string( pa_array.type ): From 9a84f4efc52ca37ce651455950da8da14a1be2be Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Mon, 22 Jan 2024 09:06:51 -0500 Subject: [PATCH 4/4] success --- pandas/tests/extension/test_arrow.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 41048fc209c4c..6970c589dd36f 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3215,6 +3215,18 @@ def test_decimal_parse_raises(): ser.astype(ArrowDtype(pa.decimal128(1, 0))) +@pytest.mark.skipif( + pa_version_under11p0, reason="Decimal128 to string cast implemented in pyarrow 11" +) +def test_decimal_parse_succeeds(): + # GH 56984 + ser = pd.Series(["1.2345"], dtype=ArrowDtype(pa.string())) + dtype = ArrowDtype(pa.decimal128(5, 4)) + result = ser.astype(dtype) + expected = pd.Series([Decimal("1.2345")], dtype=dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("pa_type", tm.TIMEDELTA_PYARROW_DTYPES) def test_duration_fillna_numpy(pa_type): # GH 54707