Skip to content

Commit 2dfd50b

Browse files
simplify fill_null calls for string[pyarrow] case
1 parent 377ff3a commit 2dfd50b

File tree

4 files changed

+20
-62
lines changed

4 files changed

+20
-62
lines changed

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,13 @@
99

1010
import numpy as np
1111

12-
from pandas._libs import (
13-
lib,
14-
missing as libmissing,
15-
)
12+
from pandas._libs import lib
1613
from pandas.compat import (
1714
pa_version_under10p1,
1815
pa_version_under13p0,
1916
pa_version_under17p0,
2017
)
2118

22-
from pandas.core.dtypes.missing import isna
23-
2419
if not pa_version_under10p1:
2520
import pyarrow as pa
2621
import pyarrow.compute as pc
@@ -36,12 +31,9 @@
3631
Self,
3732
)
3833

39-
from pandas.core.dtypes.dtypes import ExtensionDtype
40-
4134

4235
class ArrowStringArrayMixin:
4336
_pa_array: Sized
44-
dtype: ExtensionDtype
4537

4638
def __init__(self, *args, **kwargs) -> None:
4739
raise NotImplementedError
@@ -151,12 +143,6 @@ def _str_startswith(
151143

152144
for p in pat[1:]:
153145
result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
154-
if (
155-
self.dtype.na_value is libmissing.NA
156-
and na is not lib.no_default
157-
and not isna(na)
158-
): # pyright: ignore [reportGeneralTypeIssues]
159-
result = result.fill_null(na)
160146
return self._convert_bool_result(result, na=na, method_name="startswith")
161147

162148
def _str_endswith(
@@ -174,12 +160,6 @@ def _str_endswith(
174160

175161
for p in pat[1:]:
176162
result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
177-
if (
178-
self.dtype.na_value is libmissing.NA
179-
and na is not lib.no_default
180-
and not isna(na)
181-
): # pyright: ignore [reportGeneralTypeIssues]
182-
result = result.fill_null(na)
183163
return self._convert_bool_result(result, na=na, method_name="endswith")
184164

185165
def _str_isalnum(self):
@@ -234,12 +214,6 @@ def _str_contains(
234214
else:
235215
pa_contains = pc.match_substring
236216
result = pa_contains(self._pa_array, pat, ignore_case=not case)
237-
if (
238-
self.dtype.na_value is libmissing.NA
239-
and na is not lib.no_default
240-
and not isna(na)
241-
): # pyright: ignore [reportGeneralTypeIssues]
242-
result = result.fill_null(na)
243217
return self._convert_bool_result(result, na=na, method_name="contains")
244218

245219
def _str_find(self, sub: str, start: int = 0, end: int | None = None):

pandas/core/arrays/arrow/array.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2312,6 +2312,8 @@ def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
23122312
]
23132313

23142314
def _convert_bool_result(self, result, na=lib.no_default, method_name=None):
2315+
if na is not lib.no_default and not isna(na): # pyright: ignore [reportGeneralTypeIssues]
2316+
result = result.fill_null(na)
23152317
return type(self)(result)
23162318

23172319
def _convert_int_result(self, result):

pandas/core/arrays/string_arrow.py

Lines changed: 16 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -224,26 +224,26 @@ def insert(self, loc: int, item) -> ArrowStringArray:
224224
return super().insert(loc, item)
225225

226226
def _convert_bool_result(self, values, na=lib.no_default, method_name=None):
227+
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
228+
# GH#59561
229+
warnings.warn(
230+
f"Allowing a non-bool 'na' in obj.str.{method_name} is deprecated "
231+
"and will raise in a future version.",
232+
FutureWarning,
233+
stacklevel=find_stack_level(),
234+
)
235+
na = bool(na)
236+
227237
if self.dtype.na_value is np.nan:
228-
na_value: bool | lib.NoDefault
229-
if na is lib.no_default:
230-
na_value = False
231-
elif isna(na):
238+
if na is lib.no_default or isna(na):
232239
# NaN propagates as False
233240
values = values.fill_null(False)
234-
na_value = lib.no_default
235241
else:
236-
if not isinstance(na, bool):
237-
# GH#59561
238-
warnings.warn(
239-
f"Allowing a non-bool 'na' in obj.str.{method_name} is "
240-
"deprecated and will raise in a future version.",
241-
FutureWarning,
242-
stacklevel=find_stack_level(),
243-
)
244-
values = values.fill_null(bool(na))
245-
na_value = lib.no_default
246-
return ArrowExtensionArray(values).to_numpy(na_value=na_value)
242+
values = values.fill_null(na)
243+
return values.to_numpy(zero_copy_only=False)
244+
else:
245+
if na is not lib.no_default and not isna(na): # pyright: ignore [reportGeneralTypeIssues]
246+
values = values.fill_null(na)
247247
return BooleanDtype().__from_arrow__(values)
248248

249249
def _maybe_convert_setitem_value(self, value):
@@ -325,21 +325,6 @@ def _str_contains(
325325
fallback_performancewarning()
326326
return super()._str_contains(pat, case, flags, na, regex)
327327

328-
if (
329-
self.dtype.na_value is libmissing.NA
330-
and na is not lib.no_default
331-
and not isna(na)
332-
):
333-
if not isinstance(na, bool):
334-
# GH#59561
335-
warnings.warn(
336-
"Allowing a non-bool 'na' in obj.str.contains is deprecated "
337-
"and will raise in a future version.",
338-
FutureWarning,
339-
stacklevel=find_stack_level(),
340-
)
341-
na = bool(na)
342-
343328
return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex)
344329

345330
def _str_replace(

pandas/tests/strings/test_find_replace.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -298,10 +298,7 @@ def test_startswith_endswith_validate_na(any_string_dtype):
298298
)
299299

300300
dtype = ser.dtype
301-
if (
302-
isinstance(dtype, pd.StringDtype)
303-
and (dtype.storage == "python" or dtype.na_value is np.nan)
304-
) or dtype == np.dtype("object"):
301+
if (isinstance(dtype, pd.StringDtype)) or dtype == np.dtype("object"):
305302
msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated"
306303
with tm.assert_produces_warning(FutureWarning, match=msg):
307304
ser.str.startswith("kapow", na="baz")

0 commit comments

Comments
 (0)