pandas-dev · jreback · Nov 29, 2020 · Oct 6, 2020 · Oct 6, 2020 · Oct 6, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -405,7 +405,8 @@ MultiIndex
 
 - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`)
 - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`)
--
+- Bug in :meth:`MultiIndex.intersection` returned duplicates when at least one of the indexes had duplicates (:issue:`36915`)
+
 
 I/O
 ^^^
@@ -485,6 +486,7 @@ Other
 - Fixed metadata propagation in the :class:`Series.dt` and :class:`Series.str` accessors (:issue:`28283`)
 - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`)
 - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`)
+- Bug in :meth:`Index.intersection` returned duplicates when at least one of the indexes had duplicates (:issue:`31326`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2727,7 +2727,7 @@ def intersection(self, other, sort=False):
         self._assert_can_do_setop(other)
         other = ensure_index(other)
 
-        if self.equals(other):
+        if self.equals(other) and not self.has_duplicates:
             return self._get_reconciled_name_object(other)
 
         if not is_dtype_equal(self.dtype, other.dtype):
@@ -2745,6 +2745,7 @@ def intersection(self, other, sort=False):
             except TypeError:
                 pass
             else:
+                result = algos.unique1d(result)
                 return self._wrap_setop_result(other, result)
 
         try:
@@ -2756,7 +2757,7 @@ def intersection(self, other, sort=False):
             indexer = algos.unique1d(Index(rvals).get_indexer_non_unique(lvals)[0])
             indexer = indexer[indexer != -1]
 
-        result = other.take(indexer)
+        result = other.take(indexer).unique()
 
         if sort is None:
             result = algos.safe_sort(result.values)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -3475,6 +3475,8 @@ def intersection(self, other, sort=False):
         other, result_names = self._convert_can_do_setop(other)
 
         if self.equals(other):
+            if self.has_duplicates:
+                return self.unique()
             return self.rename(result_names)
 
         if not is_object_dtype(other.dtype):
@@ -3493,10 +3495,12 @@ def intersection(self, other, sort=False):
         uniq_tuples = None  # flag whether _inner_indexer was successful
         if self.is_monotonic and other.is_monotonic:
             try:
-                uniq_tuples = self._inner_indexer(lvals, rvals)[0]
-                sort = False  # uniq_tuples is already sorted
+                inner_tuples = self._inner_indexer(lvals, rvals)[0]
+                sort = False  # inner_tuples is already sorted
             except TypeError:
                 pass
+            else:
+                uniq_tuples = algos.unique(inner_tuples)
 
         if uniq_tuples is None:
             other_uniq = set(rvals)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
@@ -457,7 +457,9 @@ def _should_reindex_frame_op(
         # TODO: any other cases we should handle here?
         cols = left.columns.intersection(right.columns)
 
-        if len(cols) and not (cols.equals(left.columns) and cols.equals(right.columns)):
+        if len(cols) and not (
+            cols.equals(left.columns.unique()) and cols.equals(right.columns.unique())
+        ):
 def test_column_dups_operations(self): 
 def test_column_dups_operations(self): 
             # TODO: is there a shortcut available when len(cols) == 0?
             return True
 

diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
@@ -376,3 +376,26 @@ def test_setops_disallow_true(method):
 
     with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
         getattr(idx1, method)(idx2, sort=True)
+
+
+@pytest.mark.parametrize(
+    ("tuples", "exp_tuples"),
+    [
+        ([("val1", "test1")], [("val1", "test1")]),
+        ([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]),
+        (
+            [("val2", "test2"), ("val1", "test1")],
+            [("val2", "test2"), ("val1", "test1")],
+        ),
+    ],
+)
+def test_intersect_with_duplicates(tuples, exp_tuples):
+    # GH: 36915
+    left = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+    right = pd.MultiIndex.from_tuples(
+        [("val1", "test1"), ("val1", "test1"), ("val2", "test2")],
+        names=["first", "second"],
+    )
+    result = left.intersection(right)
+    expected = pd.MultiIndex.from_tuples(exp_tuples, names=["first", "second"])
+    tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -683,7 +683,7 @@ def test_intersection_monotonic(self, index2, keeps_name, sort):
 
     @pytest.mark.parametrize(
         "index2,expected_arr",
-        [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B", "A"])],
+        [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])],
     )
     def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
         # non-monotonic non-unique

diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
@@ -95,3 +95,13 @@ def test_union_dtypes(left, right, expected):
     b = pd.Index([], dtype=right)
     result = (a | b).dtype
     assert result == expected
+
+
+@pytest.mark.parametrize("values", [[1, 2, 2, 3], [3, 3]])
+def test_intersection_duplicates(values):
+    # GH: 31326
+    a = pd.Index(values)
+    b = pd.Index([3, 3])
+    result = a.intersection(b)
+    expected = pd.Index([3])
+    tm.assert_index_equal(result, expected)