From ff0c89c47a9545944d18cca66b86d2656f8ede0f Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 22 Aug 2023 21:45:47 -0400 Subject: [PATCH 1/2] use ArrowExtensionArray._box_pa in fillna --- pandas/core/arrays/arrow/array.py | 34 ++++++++----------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 48ff769f6c737..4ba0e70393efb 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -381,8 +381,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar: elif isna(value): pa_scalar = pa.scalar(None, type=pa_type) else: - # GH 53171: pyarrow does not yet handle pandas non-nano correctly - # see https://github.com/apache/arrow/issues/33321 + # Workaround https://github.com/apache/arrow/issues/37291 if isinstance(value, Timedelta): if pa_type is None: pa_type = pa.duration(value.unit) @@ -448,8 +447,7 @@ def _box_pa_array( and pa.types.is_duration(pa_type) and (not isinstance(value, np.ndarray) or value.dtype.kind not in "mi") ): - # GH 53171: pyarrow does not yet handle pandas non-nano correctly - # see https://github.com/apache/arrow/issues/33321 + # Workaround https://github.com/apache/arrow/issues/37291 from pandas.core.tools.timedeltas import to_timedelta value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit) @@ -462,8 +460,7 @@ def _box_pa_array( pa_array = pa.array(value, from_pandas=True) if pa_type is None and pa.types.is_duration(pa_array.type): - # GH 53171: pyarrow does not yet handle pandas non-nano correctly - # see https://github.com/apache/arrow/issues/33321 + # Workaround https://github.com/apache/arrow/issues/37291 from pandas.core.tools.timedeltas import to_timedelta value = to_timedelta(value) @@ -965,26 +962,11 @@ def fillna( f" expected {len(self)}" ) - def convert_fill_value(value, pa_type, dtype): - if value is None: - return value - if isinstance(value, (pa.Scalar, pa.Array, pa.ChunkedArray)): - return value - if isinstance(value, Timedelta) and value.unit in ("s", "ms"): - # Workaround https://github.com/apache/arrow/issues/37291 - value = value.to_numpy() - if is_array_like(value): - pa_box = pa.array - else: - pa_box = pa.scalar - try: - value = pa_box(value, type=pa_type, from_pandas=True) - except pa.ArrowTypeError as err: - msg = f"Invalid value '{str(value)}' for dtype {dtype}" - raise TypeError(msg) from err - return value - - fill_value = convert_fill_value(value, self._pa_array.type, self.dtype) + try: + fill_value = self._box_pa(value, pa_type=self._pa_array.type) + except pa.ArrowTypeError as err: + msg = f"Invalid value '{str(value)}' for dtype {self.dtype}" + raise TypeError(msg) from err try: if method is None: From 55e61e78483e191d93f7c80c74129618048cc62e Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 23 Aug 2023 06:31:10 -0400 Subject: [PATCH 2/2] add test --- pandas/tests/extension/test_arrow.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index f9c420607812c..890ffd6dfbdd9 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2997,3 +2997,13 @@ def test_arrowextensiondtype_dataframe_repr(): # pyarrow.ExtensionType values are displayed expected = " col\n0 15340\n1 15341\n2 15342" assert result == expected + + +@pytest.mark.parametrize("pa_type", tm.TIMEDELTA_PYARROW_DTYPES) +def test_duration_fillna_numpy(pa_type): + # GH 54707 + ser1 = pd.Series([None, 2], dtype=ArrowDtype(pa_type)) + ser2 = pd.Series(np.array([1, 3], dtype=f"m8[{pa_type.unit}]")) + result = ser1.fillna(ser2) + expected = pd.Series([1, 2], dtype=ArrowDtype(pa_type)) + tm.assert_series_equal(result, expected)