6
6
import numpy as np
7
7
import pytest
8
8
9
- from pandas ._config import using_string_dtype
10
-
11
9
import pandas as pd
12
10
from pandas import (
13
11
DataFrame ,
@@ -30,7 +28,6 @@ def mix_abc() -> dict[str, list[float | str]]:
30
28
31
29
32
30
class TestDataFrameReplace :
33
- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
34
31
def test_replace_inplace (self , datetime_frame , float_string_frame ):
35
32
datetime_frame .loc [datetime_frame .index [:5 ], "A" ] = np .nan
36
33
datetime_frame .loc [datetime_frame .index [- 5 :], "A" ] = np .nan
@@ -46,7 +43,9 @@ def test_replace_inplace(self, datetime_frame, float_string_frame):
46
43
mf .iloc [- 10 :, mf .columns .get_loc ("A" )] = np .nan
47
44
48
45
result = float_string_frame .replace (np .nan , 0 )
49
- expected = float_string_frame .fillna (value = 0 )
46
+ expected = float_string_frame .copy ()
47
+ expected ["foo" ] = expected ["foo" ].astype (object )
48
+ expected = expected .fillna (value = 0 )
50
49
tm .assert_frame_equal (result , expected )
51
50
52
51
tsframe = datetime_frame .copy ()
@@ -298,20 +297,22 @@ def test_regex_replace_dict_nested_non_first_character(
298
297
tm .assert_frame_equal (result , expected )
299
298
300
299
def test_regex_replace_dict_nested_gh4115 (self ):
301
- df = DataFrame ({"Type" : ["Q" , "T" , "Q" , "Q" , "T" ], "tmp" : 2 })
302
- expected = DataFrame ({"Type" : [0 , 1 , 0 , 0 , 1 ], "tmp" : 2 })
300
+ df = DataFrame (
301
+ {"Type" : Series (["Q" , "T" , "Q" , "Q" , "T" ], dtype = object ), "tmp" : 2 }
302
+ )
303
+ expected = DataFrame ({"Type" : Series ([0 , 1 , 0 , 0 , 1 ], dtype = object ), "tmp" : 2 })
303
304
msg = "Downcasting behavior in `replace`"
304
305
with tm .assert_produces_warning (FutureWarning , match = msg ):
305
306
result = df .replace ({"Type" : {"Q" : 0 , "T" : 1 }})
307
+
306
308
tm .assert_frame_equal (result , expected )
307
309
308
- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
309
310
def test_regex_replace_list_to_scalar (self , mix_abc ):
310
311
df = DataFrame (mix_abc )
311
312
expec = DataFrame (
312
313
{
313
314
"a" : mix_abc ["a" ],
314
- "b" : np . array ([np .nan ] * 4 ),
315
+ "b" : Series ([np .nan ] * 4 , dtype = "str" ),
315
316
"c" : [np .nan , np .nan , np .nan , "d" ],
316
317
}
317
318
)
@@ -334,7 +335,6 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
334
335
tm .assert_frame_equal (res2 , expec )
335
336
tm .assert_frame_equal (res3 , expec )
336
337
337
- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
338
338
def test_regex_replace_str_to_numeric (self , mix_abc ):
339
339
# what happens when you try to replace a numeric value with a regex?
340
340
df = DataFrame (mix_abc )
@@ -346,11 +346,12 @@ def test_regex_replace_str_to_numeric(self, mix_abc):
346
346
return_value = res3 .replace (regex = r"\s*\.\s*" , value = 0 , inplace = True )
347
347
assert return_value is None
348
348
expec = DataFrame ({"a" : mix_abc ["a" ], "b" : ["a" , "b" , 0 , 0 ], "c" : mix_abc ["c" ]})
349
+ # TODO(infer_string)
350
+ expec ["c" ] = expec ["c" ].astype (object )
349
351
tm .assert_frame_equal (res , expec )
350
352
tm .assert_frame_equal (res2 , expec )
351
353
tm .assert_frame_equal (res3 , expec )
352
354
353
- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
354
355
def test_regex_replace_regex_list_to_numeric (self , mix_abc ):
355
356
df = DataFrame (mix_abc )
356
357
res = df .replace ([r"\s*\.\s*" , "b" ], 0 , regex = True )
@@ -566,21 +567,28 @@ def test_replace_convert(self):
566
567
res = rep .dtypes
567
568
tm .assert_series_equal (expec , res )
568
569
569
- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
570
570
def test_replace_mixed (self , float_string_frame ):
571
571
mf = float_string_frame
572
572
mf .iloc [5 :20 , mf .columns .get_loc ("foo" )] = np .nan
573
573
mf .iloc [- 10 :, mf .columns .get_loc ("A" )] = np .nan
574
574
575
575
result = float_string_frame .replace (np .nan , - 18 )
576
- expected = float_string_frame .fillna (value = - 18 )
576
+ expected = float_string_frame .copy ()
577
+ expected ["foo" ] = expected ["foo" ].astype (object )
578
+ expected = expected .fillna (value = - 18 )
577
579
tm .assert_frame_equal (result , expected )
578
- tm .assert_frame_equal (result .replace (- 18 , np .nan ), float_string_frame )
580
+ expected2 = float_string_frame .copy ()
581
+ expected2 ["foo" ] = expected2 ["foo" ].astype (object )
582
+ tm .assert_frame_equal (result .replace (- 18 , np .nan ), expected2 )
579
583
580
584
result = float_string_frame .replace (np .nan , - 1e8 )
581
- expected = float_string_frame .fillna (value = - 1e8 )
585
+ expected = float_string_frame .copy ()
586
+ expected ["foo" ] = expected ["foo" ].astype (object )
587
+ expected = expected .fillna (value = - 1e8 )
582
588
tm .assert_frame_equal (result , expected )
583
- tm .assert_frame_equal (result .replace (- 1e8 , np .nan ), float_string_frame )
589
+ expected2 = float_string_frame .copy ()
590
+ expected2 ["foo" ] = expected2 ["foo" ].astype (object )
591
+ tm .assert_frame_equal (result .replace (- 1e8 , np .nan ), expected2 )
584
592
585
593
def test_replace_mixed_int_block_upcasting (self ):
586
594
# int block upcasting
@@ -641,7 +649,7 @@ def test_replace_mixed2(self, using_infer_string):
641
649
642
650
expected = DataFrame (
643
651
{
644
- "A" : Series (["foo" , "bar" ]),
652
+ "A" : Series (["foo" , "bar" ], dtype = "object" ),
645
653
"B" : Series ([0 , "foo" ], dtype = "object" ),
646
654
}
647
655
)
@@ -958,15 +966,16 @@ def test_replace_limit(self):
958
966
# TODO
959
967
pass
960
968
961
- def test_replace_dict_no_regex (self ):
969
+ def test_replace_dict_no_regex (self , any_string_dtype ):
962
970
answer = Series (
963
971
{
964
972
0 : "Strongly Agree" ,
965
973
1 : "Agree" ,
966
974
2 : "Neutral" ,
967
975
3 : "Disagree" ,
968
976
4 : "Strongly Disagree" ,
969
- }
977
+ },
978
+ dtype = any_string_dtype ,
970
979
)
971
980
weights = {
972
981
"Agree" : 4 ,
@@ -981,15 +990,16 @@ def test_replace_dict_no_regex(self):
981
990
result = answer .replace (weights )
982
991
tm .assert_series_equal (result , expected )
983
992
984
- def test_replace_series_no_regex (self ):
993
+ def test_replace_series_no_regex (self , any_string_dtype ):
985
994
answer = Series (
986
995
{
987
996
0 : "Strongly Agree" ,
988
997
1 : "Agree" ,
989
998
2 : "Neutral" ,
990
999
3 : "Disagree" ,
991
1000
4 : "Strongly Disagree" ,
992
- }
1001
+ },
1002
+ dtype = any_string_dtype ,
993
1003
)
994
1004
weights = Series (
995
1005
{
@@ -1087,16 +1097,15 @@ def test_nested_dict_overlapping_keys_replace_str(self):
1087
1097
expected = df .replace ({"a" : dict (zip (astr , bstr ))})
1088
1098
tm .assert_frame_equal (result , expected )
1089
1099
1090
- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
1091
- def test_replace_swapping_bug (self , using_infer_string ):
1100
+ def test_replace_swapping_bug (self ):
1092
1101
df = DataFrame ({"a" : [True , False , True ]})
1093
1102
res = df .replace ({"a" : {True : "Y" , False : "N" }})
1094
- expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]})
1103
+ expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]}, dtype = object )
1095
1104
tm .assert_frame_equal (res , expect )
1096
1105
1097
1106
df = DataFrame ({"a" : [0 , 1 , 0 ]})
1098
1107
res = df .replace ({"a" : {0 : "Y" , 1 : "N" }})
1099
- expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]})
1108
+ expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]}, dtype = object )
1100
1109
tm .assert_frame_equal (res , expect )
1101
1110
1102
1111
def test_replace_period (self ):
@@ -1372,7 +1381,7 @@ def test_replace_commutative(self, df, to_replace, exp):
1372
1381
)
1373
1382
def test_replace_replacer_dtype (self , replacer ):
1374
1383
# GH26632
1375
- df = DataFrame (["a" ])
1384
+ df = DataFrame (["a" ], dtype = object )
1376
1385
msg = "Downcasting behavior in `replace` "
1377
1386
with tm .assert_produces_warning (FutureWarning , match = msg ):
1378
1387
result = df .replace ({"a" : replacer , "b" : replacer })
@@ -1489,6 +1498,7 @@ def test_replace_value_category_type(self):
1489
1498
input_df = input_df .replace ("obj1" , "obj9" )
1490
1499
result = input_df .replace ("cat2" , "catX" )
1491
1500
1501
+ result = result .astype ({"col1" : "int64" , "col3" : "float64" , "col5" : "str" })
1492
1502
tm .assert_frame_equal (result , expected )
1493
1503
1494
1504
def test_replace_dict_category_type (self ):
@@ -1650,6 +1660,14 @@ def test_replace_regex_dtype_frame(self, regex):
1650
1660
expected_df2 = DataFrame ({"A" : [1 ], "B" : ["1" ]})
1651
1661
with tm .assert_produces_warning (FutureWarning , match = msg ):
1652
1662
result_df2 = df2 .replace (to_replace = "0" , value = 1 , regex = regex )
1663
+
1664
+ if regex :
1665
+ # TODO(infer_string): both string columns get cast to object,
1666
+ # while only needed for column A
1667
+ expected_df2 = DataFrame ({"A" : [1 ], "B" : ["1" ]}, dtype = object )
1668
+ else :
1669
+ expected_df2 = DataFrame ({"A" : Series ([1 ], dtype = object ), "B" : ["1" ]})
1670
+ result_df2 = df2 .replace (to_replace = "0" , value = 1 , regex = regex )
1653
1671
tm .assert_frame_equal (result_df2 , expected_df2 )
1654
1672
1655
1673
def test_replace_with_value_also_being_replaced (self ):
0 commit comments