Skip to content

Commit 0cb6125

Browse files
committed
Add type check for suffixes field in pd.merge to restrict types to tuple and list
1 parent e41ee47 commit 0cb6125

File tree

2 files changed

+29
-12
lines changed

2 files changed

+29
-12
lines changed

pandas/core/reshape/merge.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -667,10 +667,8 @@ def get_result(self):
667667

668668
join_index, left_indexer, right_indexer = self._get_join_info()
669669

670-
lsuf, rsuf = self.suffixes
671-
672670
llabels, rlabels = _items_overlap_with_suffix(
673-
self.left._info_axis, lsuf, self.right._info_axis, rsuf
671+
self.left._info_axis, self.right._info_axis, self.suffixes
674672
)
675673

676674
lindexers = {1: left_indexer} if left_indexer is not None else {}
@@ -1484,10 +1482,8 @@ def __init__(
14841482
def get_result(self):
14851483
join_index, left_indexer, right_indexer = self._get_join_info()
14861484

1487-
lsuf, rsuf = self.suffixes
1488-
14891485
llabels, rlabels = _items_overlap_with_suffix(
1490-
self.left._info_axis, lsuf, self.right._info_axis, rsuf
1486+
self.left._info_axis, self.right._info_axis, self.suffixes
14911487
)
14921488

14931489
if self.fill_method == "ffill":
@@ -2067,17 +2063,26 @@ def _validate_operand(obj: FrameOrSeries) -> "DataFrame":
20672063
)
20682064

20692065

2070-
def _items_overlap_with_suffix(left: Index, lsuffix, right: Index, rsuffix):
2066+
def _items_overlap_with_suffix(left: Index, right: Index, suffixes):
20712067
"""
2068+
Suffixes type validatoin.
2069+
20722070
If two indices overlap, add suffixes to overlapping entries.
20732071
20742072
If corresponding suffix is empty, the entry is simply converted to string.
20752073
20762074
"""
2075+
if not isinstance(suffixes, (tuple, list)):
2076+
raise TypeError(
2077+
f"suffixes should be of type list/tuple. But got {type(suffixes)}"
2078+
)
2079+
20772080
to_rename = left.intersection(right)
20782081
if len(to_rename) == 0:
20792082
return left, right
20802083

2084+
lsuffix, rsuffix = suffixes
2085+
20812086
if not lsuffix and not rsuffix:
20822087
raise ValueError(f"columns overlap but no suffix specified: {to_rename}")
20832088

pandas/tests/reshape/merge/test_merge.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2075,18 +2075,30 @@ def test_merge_suffix_error(col1, col2, suffixes):
20752075
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
20762076

20772077

2078-
@pytest.mark.parametrize("col1, col2, suffixes", [("a", "a", None), (0, 0, None)])
2079-
def test_merge_suffix_none_error(col1, col2, suffixes):
2080-
# issue: 24782
2078+
@pytest.mark.parametrize(
2079+
"col1, col2, suffixes", [("a", "a", {"a", "b"}), ("a", "a", None), (0, 0, None)],
2080+
)
2081+
def test_merge_suffix_type_error(col1, col2, suffixes):
20812082
a = pd.DataFrame({col1: [1, 2, 3]})
20822083
b = pd.DataFrame({col2: [3, 4, 5]})
20832084

2084-
# TODO: might reconsider current raise behaviour, see GH24782
2085-
msg = "iterable"
2085+
msg = f"suffixes should be of type list/tuple. But got {type(suffixes)}"
20862086
with pytest.raises(TypeError, match=msg):
20872087
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
20882088

20892089

2090+
@pytest.mark.parametrize(
2091+
"col1, col2, suffixes", [("a", "a", ("a", "b", "c"))],
2092+
)
2093+
def test_merge_suffix_length_error(col1, col2, suffixes):
2094+
a = pd.DataFrame({col1: [1, 2, 3]})
2095+
b = pd.DataFrame({col2: [3, 4, 5]})
2096+
2097+
msg = r"too many values to unpack \(expected 2\)"
2098+
with pytest.raises(ValueError, match=msg):
2099+
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
2100+
2101+
20902102
@pytest.mark.parametrize("cat_dtype", ["one", "two"])
20912103
@pytest.mark.parametrize("reverse", [True, False])
20922104
def test_merge_equal_cat_dtypes(cat_dtype, reverse):

0 commit comments

Comments
 (0)