pandas-dev · rhshadrach · Apr 5, 2022 · Mar 28, 2022 · Mar 28, 2022 · Mar 28, 2022
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -536,6 +536,7 @@ Reshaping
 - Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
 - Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
 - Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`)
+- Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`)
 -
 
 Sparse

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
@@ -705,7 +705,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
             names = [None]
 
         if levels is None:
-            levels = [ensure_index(keys)]
+            levels = [ensure_index(keys).unique()]
         else:
             levels = [ensure_index(x) for x in levels]
 

diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas.errors import PerformanceWarning
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -323,3 +325,49 @@ def test_concat_multiindex_(self):
             {"col": ["a", "b", "c"]}, index=MultiIndex.from_product(iterables)
         )
         tm.assert_frame_equal(result_df, expected_df)
+
+    def test_concat_with_key_not_unique(self):
+        # GitHub #46519
+        df1 = DataFrame({"name": [1]})
+        df2 = DataFrame({"name": [2]})
+        df3 = DataFrame({"name": [3]})
+        df_a = concat([df1, df2, df3], keys=["x", "y", "x"])
+        # the warning is caused by indexing unsorted multi-index
+        with tm.assert_produces_warning(
+            PerformanceWarning, match="indexing past lexsort depth"
+        ):
+            out_a = df_a.loc[("x", 0), :]
+
+        df_b = DataFrame(
+            {"name": [1, 2, 3]}, index=Index([("x", 0), ("y", 0), ("x", 0)])
+        )
+        with tm.assert_produces_warning(
+            PerformanceWarning, match="indexing past lexsort depth"
+        ):
+            out_b = df_b.loc[("x", 0)]
+
+        tm.assert_frame_equal(out_a, out_b)
+
+        df1 = DataFrame({"name": ["a", "a", "b"]})
+        df2 = DataFrame({"name": ["a", "b"]})
+        df3 = DataFrame({"name": ["c", "d"]})
+        df_a = concat([df1, df2, df3], keys=["x", "y", "x"])
+        with tm.assert_produces_warning(
+            PerformanceWarning, match="indexing past lexsort depth"
+        ):
+            out_a = df_a.loc[("x", 0), :]
+
+        df_b = DataFrame(
+            {
+                "a": ["x", "x", "x", "y", "y", "x", "x"],
+                "b": [0, 1, 2, 0, 1, 0, 1],
+                "name": list("aababcd"),
+            }
+        ).set_index(["a", "b"])
+        df_b.index.names = [None, None]
+        with tm.assert_produces_warning(
+            PerformanceWarning, match="indexing past lexsort depth"
+        ):
+            out_b = df_b.loc[("x", 0), :]
+
+        tm.assert_frame_equal(out_a, out_b)