Skip to content

Commit ee33c4e

Browse files
committed
Add test and fix whatsnew entry
1 parent e5ef14b commit ee33c4e

File tree

2 files changed

+18
-1
lines changed

2 files changed

+18
-1
lines changed

doc/source/whatsnew/v2.1.2.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Bug fixes
3030
- Fixed bug in :meth:`Index.insert` raising when inserting ``None`` into :class:`Index` with ``dtype="string[pyarrow_numpy]"`` (:issue:`55365`)
3131
- Fixed bug in :meth:`Series.all` and :meth:`Series.any` not treating missing values correctly for ``dtype="string[pyarrow_numpy]"`` (:issue:`55367`)
3232
- Fixed bug in :meth:`Series.rank` for ``string[pyarrow_numpy]`` dtype (:issue:`55362`)
33-
- Fixed bug in :meth:`Series.str.extractall` for ``string[pyarrow]`` dtype being converted to object (:issue:`53846`)
33+
- Fixed bug in :meth:`Series.str.extractall` for :class:`ArrowDtype` dtype being converted to object (:issue:`53846`)
3434
- Silence ``Period[B]`` warnings introduced by :issue:`53446` during normal plotting activity (:issue:`55138`)
3535
-
3636

pandas/tests/strings/test_extract.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import re
33

44
import numpy as np
5+
import pyarrow as pa
56
import pytest
67

78
from pandas import (
@@ -11,6 +12,7 @@
1112
Series,
1213
_testing as tm,
1314
)
15+
from pandas.core.dtypes.dtypes import ArrowDtype
1416

1517

1618
def test_extract_expand_kwarg_wrong_type_raises(any_string_dtype):
@@ -706,3 +708,18 @@ def test_extractall_same_as_extract_subject_index(any_string_dtype):
706708
has_match_index = s.str.extractall(pattern_one_noname)
707709
no_match_index = has_match_index.xs(0, level="match")
708710
tm.assert_frame_equal(extract_one_noname, no_match_index)
711+
712+
713+
@pytest.mark.parametrize(
714+
"data, expected_dtype",
715+
[
716+
(Series(["abc", "ab"], dtype=ArrowDtype(pa.string())), "string[pyarrow]"),
717+
(Series(["abc", "ab"], dtype="string"), "string[python]"),
718+
(Series(["abc", "ab"]), "object"),
719+
]
720+
)
721+
def test_extractall_preserves_dtype(data, expected_dtype):
722+
# Ensure that when extractall is called on a series with specific dtypes set, that
723+
# the dtype is preserved in the resulting DataFrame's column.
724+
result = data.str.extractall("(ab)")
725+
assert result.dtypes[0] == expected_dtype

0 commit comments

Comments
 (0)