Skip to content

Commit d7f9a5d

Browse files
committed
REF: move implementation to ArrowStringArrayMixin
1 parent ea6cb90 commit d7f9a5d

File tree

3 files changed

+24
-24
lines changed

3 files changed

+24
-24
lines changed

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,26 @@ def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
138138
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
139139
result = result.fill_null(na)
140140
return self._convert_bool_result(result)
141+
142+
def _str_find(self, sub: str, start: int = 0, end: int | None = None):
143+
if (start == 0 or start is None) and end is None:
144+
result = pc.find_substring(self._pa_array, sub)
145+
else:
146+
if sub == "":
147+
# GH#56792
148+
result = self._apply_elementwise(lambda val: val.find(sub, start, end))
149+
return self._convert_int_result(pa.chunked_array(result))
150+
if start is None:
151+
start_offset = 0
152+
start = 0
153+
elif start < 0:
154+
start_offset = pc.add(start, pc.utf8_length(self._pa_array))
155+
start_offset = pc.if_else(pc.less(start_offset, 0), 0, start_offset)
156+
else:
157+
start_offset = start
158+
slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
159+
result = pc.find_substring(slices, sub)
160+
found = pc.not_equal(result, pa.scalar(-1, type=result.type))
161+
offset_result = pc.add(result, start_offset)
162+
result = pc.if_else(found, offset_result, -1)
163+
return self._convert_int_result(result)

pandas/core/arrays/arrow/array.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2388,29 +2388,6 @@ def _str_fullmatch(
23882388
pat = f"{pat}$"
23892389
return self._str_match(pat, case, flags, na)
23902390

2391-
def _str_find(self, sub: str, start: int = 0, end: int | None = None) -> Self:
2392-
if (start == 0 or start is None) and end is None:
2393-
result = pc.find_substring(self._pa_array, sub)
2394-
else:
2395-
if sub == "":
2396-
# GH 56792
2397-
result = self._apply_elementwise(lambda val: val.find(sub, start, end))
2398-
return self._convert_int_result(pa.chunked_array(result))
2399-
if start is None:
2400-
start_offset = 0
2401-
start = 0
2402-
elif start < 0:
2403-
start_offset = pc.add(start, pc.utf8_length(self._pa_array))
2404-
start_offset = pc.if_else(pc.less(start_offset, 0), 0, start_offset)
2405-
else:
2406-
start_offset = start
2407-
slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
2408-
result = pc.find_substring(slices, sub)
2409-
found = pc.not_equal(result, pa.scalar(-1, type=result.type))
2410-
offset_result = pc.add(result, start_offset)
2411-
result = pc.if_else(found, offset_result, -1)
2412-
return self._convert_int_result(result)
2413-
24142391
def _str_join(self, sep: str) -> Self:
24152392
if pa.types.is_string(self._pa_array.type) or pa.types.is_large_string(
24162393
self._pa_array.type

pandas/core/arrays/string_arrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
445445
):
446446
# https://github.com/pandas-dev/pandas/pull/59562/files#r1725688888
447447
return super()._str_find(sub, start, end)
448-
return ArrowExtensionArray._str_find(self, sub, start, end)
448+
return ArrowStringArrayMixin._str_find(self, sub, start, end)
449449

450450
def _str_get_dummies(self, sep: str = "|"):
451451
dummies_pa, labels = ArrowExtensionArray(self._pa_array)._str_get_dummies(sep)

0 commit comments

Comments
 (0)