Skip to content

Commit e68e7b8

Browse files
committed
ENH: raise a more helpful error message when merging frames with duplicate columns. close #2649
1 parent ec0e61a commit e68e7b8

File tree

3 files changed

+25
-1
lines changed

3 files changed

+25
-1
lines changed

RELEASE.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ pandas 0.10.1
101101
- Fix setitem on a Series with a boolean key and a non-scalar as value (GH2686_)
102102
- Box datetime64 values in Series.apply/map (GH2627_, GH2689_)
103103
- Upconvert datetime + datetime64 values when concatenating frames (GH2624_)
104+
- Raise a more helpful error message in merge operations when one DataFrame
105+
has duplicate columns (GH2649_)
104106

105107
**API Changes**
106108

@@ -124,10 +126,11 @@ pandas 0.10.1
124126
.. _GH2624: https://github.com/pydata/pandas/issues/2624
125127
.. _GH2625: https://github.com/pydata/pandas/issues/2625
126128
.. _GH2627: https://github.com/pydata/pandas/issues/2627
127-
.. _GH2643: https://github.com/pydata/pandas/issues/2643
128129
.. _GH2631: https://github.com/pydata/pandas/issues/2631
129130
.. _GH2633: https://github.com/pydata/pandas/issues/2633
130131
.. _GH2637: https://github.com/pydata/pandas/issues/2637
132+
.. _GH2643: https://github.com/pydata/pandas/issues/2643
133+
.. _GH2649: https://github.com/pydata/pandas/issues/2649
131134
.. _GH2668: https://github.com/pydata/pandas/issues/2668
132135
.. _GH2689: https://github.com/pydata/pandas/issues/2689
133136
.. _GH2690: https://github.com/pydata/pandas/issues/2690

pandas/tools/merge.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,14 @@ def _validate_specification(self):
378378
if self.left_on is None:
379379
raise MergeError('Must pass left_on or left_index=True')
380380
else:
381+
if not self.left.columns.is_unique:
382+
raise MergeError("Left data columns not unique: %s"
383+
% repr(self.left.columns))
384+
385+
if not self.right.columns.is_unique:
386+
raise MergeError("Right data columns not unique: %s"
387+
% repr(self.right.columns))
388+
381389
# use the common columns
382390
common_cols = self.left.columns.intersection(
383391
self.right.columns)

pandas/tools/tests/test_merge.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,19 @@ def test_merge_nosort(self):
713713

714714
self.assert_((df.var3.unique() == result.var3.unique()).all())
715715

716+
def test_overlapping_columns_error_message(self):
717+
# #2649
718+
df = DataFrame({'key': [1, 2, 3],
719+
'v1': [4, 5, 6],
720+
'v2': [7, 8, 9]})
721+
df2 = DataFrame({'key': [1, 2, 3],
722+
'v1': [4, 5, 6],
723+
'v2': [7, 8, 9]})
724+
725+
df.columns = ['key', 'foo', 'foo']
726+
df2.columns = ['key', 'bar', 'bar']
727+
728+
self.assertRaises(Exception, merge, df, df2)
716729

717730
def _check_merge(x, y):
718731
for how in ['inner', 'left', 'outer']:

0 commit comments

Comments
 (0)