Skip to content

Commit 0b90518

Browse files
fix issue 24804
1 parent ca59a3b commit 0b90518

File tree

2 files changed

+54
-27
lines changed

2 files changed

+54
-27
lines changed

pandas/core/strings.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ def str_endswith(arr, pat, na=np.nan):
425425
return _na_map(f, arr, na, dtype=bool)
426426

427427

428-
def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
428+
def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=None):
429429
r"""
430430
Replace occurrences of pattern/regex in the Series/Index with
431431
some other string. Equivalent to :meth:`str.replace` or
@@ -550,6 +550,12 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
550550
raise TypeError("repl must be a string or callable")
551551

552552
is_compiled_re = is_re(pat)
553+
if not is_compiled_re and regex is None:
554+
if len(pat) == 1 and re.findall("\W", pat):
555+
regex = False
556+
warnings.warn(f"{pat} is interpreted as a literal in default, "
557+
f"not regex. The default will change in the future",
558+
FutureWarning)
553559
if regex:
554560
if is_compiled_re:
555561
if (case is not None) or (flags != 0):
@@ -564,7 +570,7 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
564570
# add case flag, if provided
565571
if case is False:
566572
flags |= re.IGNORECASE
567-
if is_compiled_re or len(pat) > 1 or flags or callable(repl):
573+
if is_compiled_re or pat or flags or callable(repl):
568574
n = n if n >= 0 else 0
569575
compiled = re.compile(pat, flags=flags)
570576
f = lambda x: compiled.sub(repl=repl, string=x, count=n)
@@ -2529,7 +2535,7 @@ def match(self, pat, case=True, flags=0, na=np.nan):
25292535
return self._wrap_result(result, fill_value=na)
25302536

25312537
@copy(str_replace)
2532-
def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
2538+
def replace(self, pat, repl, n=-1, case=None, flags=0, regex=None):
25332539
result = str_replace(self._parent, pat, repl, n=n, case=case,
25342540
flags=flags, regex=regex)
25352541
return self._wrap_result(result)

pandas/tests/test_strings.py

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -967,38 +967,39 @@ def test_casemethods(self):
967967
def test_replace(self):
968968
values = Series(['fooBAD__barBAD', NA])
969969

970-
result = values.str.replace('BAD[_]*', '')
970+
result = values.str.replace('BAD[_]*', '', regex=True)
971971
exp = Series(['foobar', NA])
972972
tm.assert_series_equal(result, exp)
973973

974-
result = values.str.replace('BAD[_]*', '', n=1)
974+
result = values.str.replace('BAD[_]*', '', regex=True, n=1)
975975
exp = Series(['foobarBAD', NA])
976976
tm.assert_series_equal(result, exp)
977977

978978
# mixed
979979
mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD',
980980
None, 1, 2.])
981981

982-
rs = Series(mixed).str.replace('BAD[_]*', '')
982+
rs = Series(mixed).str.replace('BAD[_]*', '', regex=True)
983983
xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA])
984984
assert isinstance(rs, Series)
985985
tm.assert_almost_equal(rs, xp)
986986

987987
# unicode
988988
values = Series([u('fooBAD__barBAD'), NA])
989989

990-
result = values.str.replace('BAD[_]*', '')
990+
result = values.str.replace('BAD[_]*', '', regex=True)
991991
exp = Series([u('foobar'), NA])
992992
tm.assert_series_equal(result, exp)
993993

994-
result = values.str.replace('BAD[_]*', '', n=1)
994+
result = values.str.replace('BAD[_]*', '', n=1, regex=True)
995995
exp = Series([u('foobarBAD'), NA])
996996
tm.assert_series_equal(result, exp)
997997

998998
# flags + unicode
999999
values = Series([b"abcd,\xc3\xa0".decode("utf-8")])
10001000
exp = Series([b"abcd, \xc3\xa0".decode("utf-8")])
1001-
result = values.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE)
1001+
result = values.str.replace(r"(?<=\w),(?=\w)", ", ", regex=True,
1002+
flags=re.UNICODE)
10021003
tm.assert_series_equal(result, exp)
10031004

10041005
# GH 13438
@@ -1014,7 +1015,7 @@ def test_replace_callable(self):
10141015

10151016
# test with callable
10161017
repl = lambda m: m.group(0).swapcase()
1017-
result = values.str.replace('[a-z][A-Z]{2}', repl, n=2)
1018+
result = values.str.replace('[a-z][A-Z]{2}', repl, n=2, regex=True)
10181019
exp = Series(['foObaD__baRbaD', NA])
10191020
tm.assert_series_equal(result, exp)
10201021

@@ -1027,21 +1028,21 @@ def test_replace_callable(self):
10271028

10281029
repl = lambda: None
10291030
with pytest.raises(TypeError, match=p_err):
1030-
values.str.replace('a', repl)
1031+
values.str.replace('a', repl, regex=True)
10311032

10321033
repl = lambda m, x: None
10331034
with pytest.raises(TypeError, match=p_err):
1034-
values.str.replace('a', repl)
1035+
values.str.replace('a', repl, regex=True)
10351036

10361037
repl = lambda m, x, y=None: None
10371038
with pytest.raises(TypeError, match=p_err):
1038-
values.str.replace('a', repl)
1039+
values.str.replace('a', repl, regex=True)
10391040

10401041
# test regex named groups
10411042
values = Series(['Foo Bar Baz', NA])
10421043
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
10431044
repl = lambda m: m.group('middle').swapcase()
1044-
result = values.str.replace(pat, repl)
1045+
result = values.str.replace(pat, repl, regex=True)
10451046
exp = Series(['bAR', NA])
10461047
tm.assert_series_equal(result, exp)
10471048

@@ -1051,35 +1052,35 @@ def test_replace_compiled_regex(self):
10511052

10521053
# test with compiled regex
10531054
pat = re.compile(r'BAD[_]*')
1054-
result = values.str.replace(pat, '')
1055+
result = values.str.replace(pat, '', regex=True)
10551056
exp = Series(['foobar', NA])
10561057
tm.assert_series_equal(result, exp)
10571058

10581059
# mixed
10591060
mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD',
10601061
None, 1, 2.])
10611062

1062-
rs = Series(mixed).str.replace(pat, '')
1063+
rs = Series(mixed).str.replace(pat, '', regex=True)
10631064
xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA])
10641065
assert isinstance(rs, Series)
10651066
tm.assert_almost_equal(rs, xp)
10661067

10671068
# unicode
10681069
values = Series([u('fooBAD__barBAD'), NA])
10691070

1070-
result = values.str.replace(pat, '')
1071+
result = values.str.replace(pat, '', regex=True)
10711072
exp = Series([u('foobar'), NA])
10721073
tm.assert_series_equal(result, exp)
10731074

1074-
result = values.str.replace(pat, '', n=1)
1075+
result = values.str.replace(pat, '', n=1, regex=True)
10751076
exp = Series([u('foobarBAD'), NA])
10761077
tm.assert_series_equal(result, exp)
10771078

10781079
# flags + unicode
10791080
values = Series([b"abcd,\xc3\xa0".decode("utf-8")])
10801081
exp = Series([b"abcd, \xc3\xa0".decode("utf-8")])
10811082
pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
1082-
result = values.str.replace(pat, ", ")
1083+
result = values.str.replace(pat, ", ", regex=True)
10831084
tm.assert_series_equal(result, exp)
10841085

10851086
# case and flags provided to str.replace will have no effect
@@ -1089,29 +1090,30 @@ def test_replace_compiled_regex(self):
10891090

10901091
with pytest.raises(ValueError,
10911092
match="case and flags cannot be"):
1092-
result = values.str.replace(pat, '', flags=re.IGNORECASE)
1093+
result = values.str.replace(pat, '', flags=re.IGNORECASE,
1094+
regex=True)
10931095

10941096
with pytest.raises(ValueError,
10951097
match="case and flags cannot be"):
1096-
result = values.str.replace(pat, '', case=False)
1098+
result = values.str.replace(pat, '', case=False, regex=True)
10971099

10981100
with pytest.raises(ValueError,
10991101
match="case and flags cannot be"):
1100-
result = values.str.replace(pat, '', case=True)
1102+
result = values.str.replace(pat, '', case=True, regex=True)
11011103

11021104
# test with callable
11031105
values = Series(['fooBAD__barBAD', NA])
11041106
repl = lambda m: m.group(0).swapcase()
11051107
pat = re.compile('[a-z][A-Z]{2}')
1106-
result = values.str.replace(pat, repl, n=2)
1108+
result = values.str.replace(pat, repl, n=2, regex=True)
11071109
exp = Series(['foObaD__baRbaD', NA])
11081110
tm.assert_series_equal(result, exp)
11091111

11101112
def test_replace_literal(self):
11111113
# GH16808 literal replace (regex=False vs regex=True)
11121114
values = Series(['f.o', 'foo', NA])
11131115
exp = Series(['bao', 'bao', NA])
1114-
result = values.str.replace('f.', 'ba')
1116+
result = values.str.replace('f.', 'ba', regex=True)
11151117
tm.assert_series_equal(result, exp)
11161118

11171119
exp = Series(['bao', 'foo', NA])
@@ -3236,17 +3238,17 @@ def test_replace_moar(self):
32363238
s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, 'CABA',
32373239
'dog', 'cat'])
32383240

3239-
result = s.str.replace('A', 'YYY')
3241+
result = s.str.replace('A', 'YYY', regex=True)
32403242
expected = Series(['YYY', 'B', 'C', 'YYYaba', 'Baca', '', NA,
32413243
'CYYYBYYY', 'dog', 'cat'])
32423244
assert_series_equal(result, expected)
32433245

3244-
result = s.str.replace('A', 'YYY', case=False)
3246+
result = s.str.replace('A', 'YYY', case=False, regex=True)
32453247
expected = Series(['YYY', 'B', 'C', 'YYYYYYbYYY', 'BYYYcYYY', '', NA,
32463248
'CYYYBYYY', 'dog', 'cYYYt'])
32473249
assert_series_equal(result, expected)
32483250

3249-
result = s.str.replace('^.a|dog', 'XX-XX ', case=False)
3251+
result = s.str.replace('^.a|dog', 'XX-XX ', case=False, regex=True)
32503252
expected = Series(['A', 'B', 'C', 'XX-XX ba', 'XX-XX ca', '', NA,
32513253
'XX-XX BA', 'XX-XX ', 'XX-XX t'])
32523254
assert_series_equal(result, expected)
@@ -3424,3 +3426,22 @@ def test_method_on_bytes(self):
34243426
expected = Series(np.array(
34253427
['ad', 'be', 'cf'], 'S2').astype(object))
34263428
tm.assert_series_equal(result, expected)
3429+
3430+
@pytest.mark.parametrize("regex, expected_array", [
3431+
(True, ['foofoofoo', 'foofoofoo']),
3432+
(False, ['abc', '123']),
3433+
(None, ['abc', '123'])
3434+
])
3435+
def test_replace_single_pattern(self, regex, expected_array):
3436+
values = Series(['abc', '123'])
3437+
# GH: 24804
3438+
result = values.str.replace('.', 'foo', regex=regex)
3439+
expected = Series(expected_array)
3440+
tm.assert_series_equal(result, expected)
3441+
3442+
def test_replace_without_specifying_regex_parameter(self):
3443+
values = Series(['a.c'])
3444+
# GH: 24804
3445+
result = values.str.replace('.', 'b')
3446+
expected = Series(['abc'])
3447+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)