Skip to content

Commit 377ff3a

Browse files
follow same behaviour for categorical[str]
1 parent b650064 commit 377ff3a

File tree

3 files changed

+14
-5
lines changed

3 files changed

+14
-5
lines changed

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def _str_contains(
223223
pat,
224224
case: bool = True,
225225
flags: int = 0,
226-
na=lib.no_default,
226+
na: Scalar | lib.NoDefault = lib.no_default,
227227
regex: bool = True,
228228
):
229229
if flags:

pandas/core/arrays/categorical.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2669,16 +2669,24 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
26692669
# ------------------------------------------------------------------------
26702670
# String methods interface
26712671
def _str_map(
2672-
self, f, na_value=np.nan, dtype=np.dtype("object"), convert: bool = True
2672+
self, f, na_value=lib.no_default, dtype=np.dtype("object"), convert: bool = True
26732673
):
26742674
# Optimization to apply the callable `f` to the categories once
26752675
# and rebuild the result by `take`ing from the result with the codes.
26762676
# Returns the same type as the object-dtype implementation though.
2677-
from pandas.core.arrays import NumpyExtensionArray
2678-
26792677
categories = self.categories
26802678
codes = self.codes
2681-
result = NumpyExtensionArray(categories.to_numpy())._str_map(f, na_value, dtype)
2679+
if categories.dtype == "string":
2680+
result = categories.array._str_map(f, na_value, dtype)
2681+
if categories.dtype.na_value is np.nan:
2682+
# NaN propagates as False
2683+
na_value = False
2684+
else:
2685+
from pandas.core.arrays import NumpyExtensionArray
2686+
2687+
result = NumpyExtensionArray(categories.to_numpy())._str_map(
2688+
f, na_value, dtype
2689+
)
26822690
return take_nd(result, codes, fill_value=na_value)
26832691

26842692
def _str_get_dummies(self, sep: str = "|"):

pandas/core/arrays/string_.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ def _str_map_nan_semantics(
440440
dtype = self.dtype
441441
if na_value is lib.no_default:
442442
if is_bool_dtype(dtype):
443+
# NaN propagates as False
443444
na_value = False
444445
else:
445446
na_value = self.dtype.na_value

0 commit comments

Comments
 (0)