@@ -266,144 +266,157 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na):
266
266
tm .assert_series_equal (result , exp )
267
267
268
268
269
- def test_replace ():
270
- values = Series (["fooBAD__barBAD" , np .nan ])
269
+ def test_replace (any_string_dtype ):
270
+ values = Series (["fooBAD__barBAD" , np .nan ], dtype = any_string_dtype )
271
271
272
272
result = values .str .replace ("BAD[_]*" , "" , regex = True )
273
- exp = Series (["foobar" , np .nan ])
274
- tm .assert_series_equal (result , exp )
273
+ expected = Series (["foobar" , np .nan ], dtype = any_string_dtype )
274
+ tm .assert_series_equal (result , expected )
275
275
276
276
result = values .str .replace ("BAD[_]*" , "" , n = 1 , regex = True )
277
- exp = Series (["foobarBAD" , np .nan ])
278
- tm .assert_series_equal (result , exp )
277
+ expected = Series (["foobarBAD" , np .nan ], dtype = any_string_dtype )
278
+ tm .assert_series_equal (result , expected )
279
279
280
- # mixed
280
+
281
+ def test_replace_mixed_object ():
281
282
mixed = Series (
282
283
["aBAD" , np .nan , "bBAD" , True , datetime .today (), "fooBAD" , None , 1 , 2.0 ]
283
284
)
284
285
285
- rs = Series (mixed ).str .replace ("BAD[_]*" , "" , regex = True )
286
- xp = Series (["a" , np .nan , "b" , np .nan , np .nan , "foo" , np .nan , np .nan , np .nan ])
287
- assert isinstance (rs , Series )
288
- tm .assert_almost_equal (rs , xp )
286
+ result = Series (mixed ).str .replace ("BAD[_]*" , "" , regex = True )
287
+ expected = Series (["a" , np .nan , "b" , np .nan , np .nan , "foo" , np .nan , np .nan , np .nan ])
288
+ assert isinstance (result , Series )
289
+ tm .assert_almost_equal (result , expected )
289
290
290
- # flags + unicode
291
- values = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )])
292
- exp = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )])
291
+
292
+ def test_replace_unicode (any_string_dtype ):
293
+ values = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
294
+ expected = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
293
295
result = values .str .replace (r"(?<=\w),(?=\w)" , ", " , flags = re .UNICODE , regex = True )
294
- tm .assert_series_equal (result , exp )
296
+ tm .assert_series_equal (result , expected )
297
+
295
298
296
- # GH 13438
299
+ @pytest .mark .parametrize ("klass" , [Series , Index ])
300
+ @pytest .mark .parametrize ("repl" , [None , 3 , {"a" : "b" }])
301
+ @pytest .mark .parametrize ("data" , [["a" , "b" , None ], ["a" , "b" , "c" , "ad" ]])
302
+ def test_replace_raises (any_string_dtype , klass , repl , data ):
303
+ # https://github.com/pandas-dev/pandas/issues/13438
297
304
msg = "repl must be a string or callable"
298
- for klass in (Series , Index ):
299
- for repl in (None , 3 , {"a" : "b" }):
300
- for data in (["a" , "b" , None ], ["a" , "b" , "c" , "ad" ]):
301
- values = klass (data )
302
- with pytest .raises (TypeError , match = msg ):
303
- values .str .replace ("a" , repl )
305
+ values = klass (data , dtype = any_string_dtype )
306
+ with pytest .raises (TypeError , match = msg ):
307
+ values .str .replace ("a" , repl )
304
308
305
309
306
- def test_replace_callable ():
310
+ def test_replace_callable (any_string_dtype ):
307
311
# GH 15055
308
- values = Series (["fooBAD__barBAD" , np .nan ])
312
+ values = Series (["fooBAD__barBAD" , np .nan ], dtype = any_string_dtype )
309
313
310
314
# test with callable
311
315
repl = lambda m : m .group (0 ).swapcase ()
312
316
result = values .str .replace ("[a-z][A-Z]{2}" , repl , n = 2 , regex = True )
313
- exp = Series (["foObaD__baRbaD" , np .nan ])
314
- tm .assert_series_equal (result , exp )
317
+ expected = Series (["foObaD__baRbaD" , np .nan ], dtype = any_string_dtype )
318
+ tm .assert_series_equal (result , expected )
319
+
320
+
321
+ @pytest .mark .parametrize (
322
+ "repl" , [lambda : None , lambda m , x : None , lambda m , x , y = None : None ]
323
+ )
324
+ def test_replace_callable_raises (any_string_dtype , repl ):
325
+ # GH 15055
326
+ values = Series (["fooBAD__barBAD" , np .nan ], dtype = any_string_dtype )
315
327
316
328
# test with wrong number of arguments, raising an error
317
- p_err = (
329
+ msg = (
318
330
r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
319
331
r"(?(3)required )positional arguments?"
320
332
)
321
-
322
- repl = lambda : None
323
- with pytest .raises (TypeError , match = p_err ):
333
+ with pytest .raises (TypeError , match = msg ):
324
334
values .str .replace ("a" , repl )
325
335
326
- repl = lambda m , x : None
327
- with pytest .raises (TypeError , match = p_err ):
328
- values .str .replace ("a" , repl )
329
-
330
- repl = lambda m , x , y = None : None
331
- with pytest .raises (TypeError , match = p_err ):
332
- values .str .replace ("a" , repl )
333
336
337
+ def test_replace_callable_named_groups (any_string_dtype ):
334
338
# test regex named groups
335
- values = Series (["Foo Bar Baz" , np .nan ])
339
+ values = Series (["Foo Bar Baz" , np .nan ], dtype = any_string_dtype )
336
340
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
337
341
repl = lambda m : m .group ("middle" ).swapcase ()
338
342
result = values .str .replace (pat , repl , regex = True )
339
- exp = Series (["bAR" , np .nan ])
340
- tm .assert_series_equal (result , exp )
343
+ expected = Series (["bAR" , np .nan ], dtype = any_string_dtype )
344
+ tm .assert_series_equal (result , expected )
341
345
342
346
343
- def test_replace_compiled_regex ():
347
+ def test_replace_compiled_regex (any_string_dtype ):
344
348
# GH 15446
345
- values = Series (["fooBAD__barBAD" , np .nan ])
349
+ values = Series (["fooBAD__barBAD" , np .nan ], dtype = any_string_dtype )
346
350
347
351
# test with compiled regex
348
352
pat = re .compile (r"BAD_*" )
349
353
result = values .str .replace (pat , "" , regex = True )
350
- exp = Series (["foobar" , np .nan ])
351
- tm .assert_series_equal (result , exp )
354
+ expected = Series (["foobar" , np .nan ], dtype = any_string_dtype )
355
+ tm .assert_series_equal (result , expected )
352
356
353
357
result = values .str .replace (pat , "" , n = 1 , regex = True )
354
- exp = Series (["foobarBAD" , np .nan ])
355
- tm .assert_series_equal (result , exp )
358
+ expected = Series (["foobarBAD" , np .nan ], dtype = any_string_dtype )
359
+ tm .assert_series_equal (result , expected )
356
360
357
- # mixed
361
+
362
+ def test_replace_compiled_regex_mixed_object ():
363
+ pat = re .compile (r"BAD_*" )
358
364
mixed = Series (
359
365
["aBAD" , np .nan , "bBAD" , True , datetime .today (), "fooBAD" , None , 1 , 2.0 ]
360
366
)
361
367
362
- rs = Series (mixed ).str .replace (pat , "" , regex = True )
363
- xp = Series (["a" , np .nan , "b" , np .nan , np .nan , "foo" , np .nan , np .nan , np .nan ])
364
- assert isinstance (rs , Series )
365
- tm .assert_almost_equal (rs , xp )
368
+ result = Series (mixed ).str .replace (pat , "" , regex = True )
369
+ expected = Series (["a" , np .nan , "b" , np .nan , np .nan , "foo" , np .nan , np .nan , np .nan ])
370
+ assert isinstance (result , Series )
371
+ tm .assert_almost_equal (result , expected )
372
+
366
373
367
- # flags + unicode
368
- values = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )])
369
- exp = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )])
374
+ def test_replace_compiled_regex_unicode ( any_string_dtype ):
375
+ values = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
376
+ expected = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
370
377
pat = re .compile (r"(?<=\w),(?=\w)" , flags = re .UNICODE )
371
378
result = values .str .replace (pat , ", " )
372
- tm .assert_series_equal (result , exp )
379
+ tm .assert_series_equal (result , expected )
373
380
381
+
382
+ def test_replace_compiled_regex_raises (any_string_dtype ):
374
383
# case and flags provided to str.replace will have no effect
375
384
# and will produce warnings
376
- values = Series (["fooBAD__barBAD__bad" , np .nan ])
385
+ values = Series (["fooBAD__barBAD__bad" , np .nan ], dtype = any_string_dtype )
377
386
pat = re .compile (r"BAD_*" )
378
387
379
- with pytest .raises (ValueError , match = "case and flags cannot be" ):
380
- result = values .str .replace (pat , "" , flags = re .IGNORECASE )
388
+ msg = "case and flags cannot be set when pat is a compiled regex"
381
389
382
- with pytest .raises (ValueError , match = "case and flags cannot be" ):
383
- result = values .str .replace (pat , "" , case = False )
390
+ with pytest .raises (ValueError , match = msg ):
391
+ values .str .replace (pat , "" , flags = re . IGNORECASE )
384
392
385
- with pytest .raises (ValueError , match = "case and flags cannot be" ):
386
- result = values .str .replace (pat , "" , case = True )
393
+ with pytest .raises (ValueError , match = msg ):
394
+ values .str .replace (pat , "" , case = False )
387
395
396
+ with pytest .raises (ValueError , match = msg ):
397
+ values .str .replace (pat , "" , case = True )
398
+
399
+
400
+ def test_replace_compiled_regex_callable (any_string_dtype ):
388
401
# test with callable
389
- values = Series (["fooBAD__barBAD" , np .nan ])
402
+ values = Series (["fooBAD__barBAD" , np .nan ], dtype = any_string_dtype )
390
403
repl = lambda m : m .group (0 ).swapcase ()
391
404
pat = re .compile ("[a-z][A-Z]{2}" )
392
405
result = values .str .replace (pat , repl , n = 2 )
393
- exp = Series (["foObaD__baRbaD" , np .nan ])
394
- tm .assert_series_equal (result , exp )
406
+ expected = Series (["foObaD__baRbaD" , np .nan ], dtype = any_string_dtype )
407
+ tm .assert_series_equal (result , expected )
395
408
396
409
397
- def test_replace_literal ():
410
+ def test_replace_literal (any_string_dtype ):
398
411
# GH16808 literal replace (regex=False vs regex=True)
399
- values = Series (["f.o" , "foo" , np .nan ])
400
- exp = Series (["bao" , "bao" , np .nan ])
412
+ values = Series (["f.o" , "foo" , np .nan ], dtype = any_string_dtype )
413
+ expected = Series (["bao" , "bao" , np .nan ], dtype = any_string_dtype )
401
414
result = values .str .replace ("f." , "ba" , regex = True )
402
- tm .assert_series_equal (result , exp )
415
+ tm .assert_series_equal (result , expected )
403
416
404
- exp = Series (["bao" , "foo" , np .nan ])
417
+ expected = Series (["bao" , "foo" , np .nan ], dtype = any_string_dtype )
405
418
result = values .str .replace ("f." , "ba" , regex = False )
406
- tm .assert_series_equal (result , exp )
419
+ tm .assert_series_equal (result , expected )
407
420
408
421
# Cannot do a literal replace if given a callable repl or compiled
409
422
# pattern
@@ -680,13 +693,17 @@ def test_contains_nan(any_string_dtype):
680
693
tm .assert_series_equal (result , expected )
681
694
682
695
683
- def test_replace_moar ():
696
+ def test_replace_moar (any_string_dtype ):
684
697
# PR #1179
685
- s = Series (["A" , "B" , "C" , "Aaba" , "Baca" , "" , np .nan , "CABA" , "dog" , "cat" ])
698
+ s = Series (
699
+ ["A" , "B" , "C" , "Aaba" , "Baca" , "" , np .nan , "CABA" , "dog" , "cat" ],
700
+ dtype = any_string_dtype ,
701
+ )
686
702
687
703
result = s .str .replace ("A" , "YYY" )
688
704
expected = Series (
689
- ["YYY" , "B" , "C" , "YYYaba" , "Baca" , "" , np .nan , "CYYYBYYY" , "dog" , "cat" ]
705
+ ["YYY" , "B" , "C" , "YYYaba" , "Baca" , "" , np .nan , "CYYYBYYY" , "dog" , "cat" ],
706
+ dtype = any_string_dtype ,
690
707
)
691
708
tm .assert_series_equal (result , expected )
692
709
@@ -703,7 +720,8 @@ def test_replace_moar():
703
720
"CYYYBYYY" ,
704
721
"dog" ,
705
722
"cYYYt" ,
706
- ]
723
+ ],
724
+ dtype = any_string_dtype ,
707
725
)
708
726
tm .assert_series_equal (result , expected )
709
727
@@ -720,7 +738,8 @@ def test_replace_moar():
720
738
"XX-XX BA" ,
721
739
"XX-XX " ,
722
740
"XX-XX t" ,
723
- ]
741
+ ],
742
+ dtype = any_string_dtype ,
724
743
)
725
744
tm .assert_series_equal (result , expected )
726
745
0 commit comments