Skip to content

Commit 362d8f7

Browse files
TST (string dtype): resolve xfails for frame fillna and replace tests + fix bug in replace for string (pandas-dev#60295)
(cherry picked from commit fae3e80)
1 parent e37ffb3 commit 362d8f7

File tree

4 files changed

+64
-50
lines changed

4 files changed

+64
-50
lines changed

pandas/core/array_algos/replace.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,4 +149,6 @@ def re_replacer(s):
149149
if mask is None:
150150
values[:] = f(values)
151151
else:
152+
if values.ndim != mask.ndim:
153+
mask = np.broadcast_to(mask, values.shape)
152154
values[mask] = f(values[mask])

pandas/core/internals/blocks.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2139,6 +2139,13 @@ def where(
21392139
if isinstance(self.dtype, (IntervalDtype, StringDtype)):
21402140
# TestSetitemFloatIntervalWithIntIntervalValues
21412141
blk = self.coerce_to_target_dtype(orig_other)
2142+
if (
2143+
self.ndim == 2
2144+
and isinstance(orig_cond, np.ndarray)
2145+
and orig_cond.ndim == 1
2146+
and not is_1d_only_ea_dtype(blk.dtype)
2147+
):
2148+
orig_cond = orig_cond[:, None]
21422149
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
21432150
return self._maybe_downcast(
21442151
nbs, downcast=_downcast, using_cow=using_cow, caller="where"

pandas/tests/frame/methods/test_fillna.py

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas.util._test_decorators as td
75

86
from pandas import (
@@ -91,8 +89,6 @@ def test_fillna_datetime(self, datetime_frame):
9189
with pytest.raises(ValueError, match=msg):
9290
datetime_frame.fillna(5, method="ffill")
9391

94-
# TODO(infer_string) test as actual error instead of xfail
95-
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
9692
def test_fillna_mixed_type(self, float_string_frame):
9793
mf = float_string_frame
9894
mf.loc[mf.index[5:20], "foo"] = np.nan
@@ -126,26 +122,24 @@ def test_fillna_empty(self, using_copy_on_write):
126122
df.x.fillna(method=m, inplace=True)
127123
df.x.fillna(method=m)
128124

129-
def test_fillna_different_dtype(self, using_infer_string):
125+
def test_fillna_different_dtype(self):
130126
# with different dtype (GH#3386)
131127
df = DataFrame(
132128
[["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
133129
)
134130

135-
if using_infer_string:
136-
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
137-
result = df.fillna({2: "foo"})
138-
else:
131+
msg = "Downcasting object dtype arrays"
132+
with tm.assert_produces_warning(FutureWarning, match=msg):
139133
result = df.fillna({2: "foo"})
140134
expected = DataFrame(
141135
[["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
142136
)
137+
# column is originally float (all-NaN) -> filling with string gives object dtype
138+
# expected[2] = expected[2].astype("object")
143139
tm.assert_frame_equal(result, expected)
144140

145-
if using_infer_string:
146-
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
147-
return_value = df.fillna({2: "foo"}, inplace=True)
148-
else:
141+
msg = "Downcasting object dtype arrays"
142+
with tm.assert_produces_warning(FutureWarning, match=msg):
149143
return_value = df.fillna({2: "foo"}, inplace=True)
150144
tm.assert_frame_equal(df, expected)
151145
assert return_value is None
@@ -390,6 +384,7 @@ def test_fillna_dtype_conversion(self, using_infer_string):
390384
result = df.fillna("nan")
391385
else:
392386
result = df.fillna("nan")
387+
# expected = DataFrame("nan", dtype="object", index=range(3),columns=["A", "B"])
393388
expected = DataFrame("nan", index=range(3), columns=["A", "B"])
394389
tm.assert_frame_equal(result, expected)
395390

@@ -665,18 +660,10 @@ def test_fillna_col_reordering(self):
665660
filled = df.fillna(method="ffill")
666661
assert df.columns.tolist() == filled.columns.tolist()
667662

668-
# TODO(infer_string) test as actual error instead of xfail
669-
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
670-
def test_fill_corner(self, float_frame, float_string_frame):
671-
mf = float_string_frame
672-
mf.loc[mf.index[5:20], "foo"] = np.nan
673-
mf.loc[mf.index[-10:], "A"] = np.nan
674-
675-
filled = float_string_frame.fillna(value=0)
676-
assert (filled.loc[filled.index[5:20], "foo"] == 0).all()
677-
del float_string_frame["foo"]
678-
679-
float_frame.reindex(columns=[]).fillna(value=0)
663+
def test_fill_empty(self, float_frame):
664+
df = float_frame.reindex(columns=[])
665+
result = df.fillna(value=0)
666+
tm.assert_frame_equal(result, df)
680667

681668
def test_fillna_downcast_dict(self):
682669
# GH#40809

pandas/tests/frame/methods/test_replace.py

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import numpy as np
77
import pytest
88

9-
from pandas._config import using_string_dtype
10-
119
import pandas as pd
1210
from pandas import (
1311
DataFrame,
@@ -30,7 +28,6 @@ def mix_abc() -> dict[str, list[float | str]]:
3028

3129

3230
class TestDataFrameReplace:
33-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
3431
def test_replace_inplace(self, datetime_frame, float_string_frame):
3532
datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
3633
datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
@@ -46,7 +43,9 @@ def test_replace_inplace(self, datetime_frame, float_string_frame):
4643
mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan
4744

4845
result = float_string_frame.replace(np.nan, 0)
49-
expected = float_string_frame.fillna(value=0)
46+
expected = float_string_frame.copy()
47+
expected["foo"] = expected["foo"].astype(object)
48+
expected = expected.fillna(value=0)
5049
tm.assert_frame_equal(result, expected)
5150

5251
tsframe = datetime_frame.copy()
@@ -298,20 +297,22 @@ def test_regex_replace_dict_nested_non_first_character(
298297
tm.assert_frame_equal(result, expected)
299298

300299
def test_regex_replace_dict_nested_gh4115(self):
301-
df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2})
302-
expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2})
300+
df = DataFrame(
301+
{"Type": Series(["Q", "T", "Q", "Q", "T"], dtype=object), "tmp": 2}
302+
)
303+
expected = DataFrame({"Type": Series([0, 1, 0, 0, 1], dtype=object), "tmp": 2})
303304
msg = "Downcasting behavior in `replace`"
304305
with tm.assert_produces_warning(FutureWarning, match=msg):
305306
result = df.replace({"Type": {"Q": 0, "T": 1}})
307+
306308
tm.assert_frame_equal(result, expected)
307309

308-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
309310
def test_regex_replace_list_to_scalar(self, mix_abc):
310311
df = DataFrame(mix_abc)
311312
expec = DataFrame(
312313
{
313314
"a": mix_abc["a"],
314-
"b": np.array([np.nan] * 4),
315+
"b": Series([np.nan] * 4, dtype="str"),
315316
"c": [np.nan, np.nan, np.nan, "d"],
316317
}
317318
)
@@ -334,7 +335,6 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
334335
tm.assert_frame_equal(res2, expec)
335336
tm.assert_frame_equal(res3, expec)
336337

337-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
338338
def test_regex_replace_str_to_numeric(self, mix_abc):
339339
# what happens when you try to replace a numeric value with a regex?
340340
df = DataFrame(mix_abc)
@@ -346,11 +346,12 @@ def test_regex_replace_str_to_numeric(self, mix_abc):
346346
return_value = res3.replace(regex=r"\s*\.\s*", value=0, inplace=True)
347347
assert return_value is None
348348
expec = DataFrame({"a": mix_abc["a"], "b": ["a", "b", 0, 0], "c": mix_abc["c"]})
349+
# TODO(infer_string)
350+
expec["c"] = expec["c"].astype(object)
349351
tm.assert_frame_equal(res, expec)
350352
tm.assert_frame_equal(res2, expec)
351353
tm.assert_frame_equal(res3, expec)
352354

353-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
354355
def test_regex_replace_regex_list_to_numeric(self, mix_abc):
355356
df = DataFrame(mix_abc)
356357
res = df.replace([r"\s*\.\s*", "b"], 0, regex=True)
@@ -566,21 +567,28 @@ def test_replace_convert(self):
566567
res = rep.dtypes
567568
tm.assert_series_equal(expec, res)
568569

569-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
570570
def test_replace_mixed(self, float_string_frame):
571571
mf = float_string_frame
572572
mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan
573573
mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan
574574

575575
result = float_string_frame.replace(np.nan, -18)
576-
expected = float_string_frame.fillna(value=-18)
576+
expected = float_string_frame.copy()
577+
expected["foo"] = expected["foo"].astype(object)
578+
expected = expected.fillna(value=-18)
577579
tm.assert_frame_equal(result, expected)
578-
tm.assert_frame_equal(result.replace(-18, np.nan), float_string_frame)
580+
expected2 = float_string_frame.copy()
581+
expected2["foo"] = expected2["foo"].astype(object)
582+
tm.assert_frame_equal(result.replace(-18, np.nan), expected2)
579583

580584
result = float_string_frame.replace(np.nan, -1e8)
581-
expected = float_string_frame.fillna(value=-1e8)
585+
expected = float_string_frame.copy()
586+
expected["foo"] = expected["foo"].astype(object)
587+
expected = expected.fillna(value=-1e8)
582588
tm.assert_frame_equal(result, expected)
583-
tm.assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame)
589+
expected2 = float_string_frame.copy()
590+
expected2["foo"] = expected2["foo"].astype(object)
591+
tm.assert_frame_equal(result.replace(-1e8, np.nan), expected2)
584592

585593
def test_replace_mixed_int_block_upcasting(self):
586594
# int block upcasting
@@ -641,7 +649,7 @@ def test_replace_mixed2(self, using_infer_string):
641649

642650
expected = DataFrame(
643651
{
644-
"A": Series(["foo", "bar"]),
652+
"A": Series(["foo", "bar"], dtype="object"),
645653
"B": Series([0, "foo"], dtype="object"),
646654
}
647655
)
@@ -958,15 +966,16 @@ def test_replace_limit(self):
958966
# TODO
959967
pass
960968

961-
def test_replace_dict_no_regex(self):
969+
def test_replace_dict_no_regex(self, any_string_dtype):
962970
answer = Series(
963971
{
964972
0: "Strongly Agree",
965973
1: "Agree",
966974
2: "Neutral",
967975
3: "Disagree",
968976
4: "Strongly Disagree",
969-
}
977+
},
978+
dtype=any_string_dtype,
970979
)
971980
weights = {
972981
"Agree": 4,
@@ -981,15 +990,16 @@ def test_replace_dict_no_regex(self):
981990
result = answer.replace(weights)
982991
tm.assert_series_equal(result, expected)
983992

984-
def test_replace_series_no_regex(self):
993+
def test_replace_series_no_regex(self, any_string_dtype):
985994
answer = Series(
986995
{
987996
0: "Strongly Agree",
988997
1: "Agree",
989998
2: "Neutral",
990999
3: "Disagree",
9911000
4: "Strongly Disagree",
992-
}
1001+
},
1002+
dtype=any_string_dtype,
9931003
)
9941004
weights = Series(
9951005
{
@@ -1087,16 +1097,15 @@ def test_nested_dict_overlapping_keys_replace_str(self):
10871097
expected = df.replace({"a": dict(zip(astr, bstr))})
10881098
tm.assert_frame_equal(result, expected)
10891099

1090-
@pytest.mark.xfail(using_string_dtype(), reason="can't set float into string")
1091-
def test_replace_swapping_bug(self, using_infer_string):
1100+
def test_replace_swapping_bug(self):
10921101
df = DataFrame({"a": [True, False, True]})
10931102
res = df.replace({"a": {True: "Y", False: "N"}})
1094-
expect = DataFrame({"a": ["Y", "N", "Y"]})
1103+
expect = DataFrame({"a": ["Y", "N", "Y"]}, dtype=object)
10951104
tm.assert_frame_equal(res, expect)
10961105

10971106
df = DataFrame({"a": [0, 1, 0]})
10981107
res = df.replace({"a": {0: "Y", 1: "N"}})
1099-
expect = DataFrame({"a": ["Y", "N", "Y"]})
1108+
expect = DataFrame({"a": ["Y", "N", "Y"]}, dtype=object)
11001109
tm.assert_frame_equal(res, expect)
11011110

11021111
def test_replace_period(self):
@@ -1372,7 +1381,7 @@ def test_replace_commutative(self, df, to_replace, exp):
13721381
)
13731382
def test_replace_replacer_dtype(self, replacer):
13741383
# GH26632
1375-
df = DataFrame(["a"])
1384+
df = DataFrame(["a"], dtype=object)
13761385
msg = "Downcasting behavior in `replace` "
13771386
with tm.assert_produces_warning(FutureWarning, match=msg):
13781387
result = df.replace({"a": replacer, "b": replacer})
@@ -1489,6 +1498,7 @@ def test_replace_value_category_type(self):
14891498
input_df = input_df.replace("obj1", "obj9")
14901499
result = input_df.replace("cat2", "catX")
14911500

1501+
result = result.astype({"col1": "int64", "col3": "float64", "col5": "str"})
14921502
tm.assert_frame_equal(result, expected)
14931503

14941504
def test_replace_dict_category_type(self):
@@ -1650,6 +1660,14 @@ def test_replace_regex_dtype_frame(self, regex):
16501660
expected_df2 = DataFrame({"A": [1], "B": ["1"]})
16511661
with tm.assert_produces_warning(FutureWarning, match=msg):
16521662
result_df2 = df2.replace(to_replace="0", value=1, regex=regex)
1663+
1664+
if regex:
1665+
# TODO(infer_string): both string columns get cast to object,
1666+
# while only needed for column A
1667+
expected_df2 = DataFrame({"A": [1], "B": ["1"]}, dtype=object)
1668+
else:
1669+
expected_df2 = DataFrame({"A": Series([1], dtype=object), "B": ["1"]})
1670+
result_df2 = df2.replace(to_replace="0", value=1, regex=regex)
16531671
tm.assert_frame_equal(result_df2, expected_df2)
16541672

16551673
def test_replace_with_value_also_being_replaced(self):

0 commit comments

Comments
 (0)