diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index f18b3b75ca3d2..25906d52d48ff 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -486,6 +486,7 @@ Reshaping - Bug in :meth:`DataFrame.append` failing to retain dtypes when appended columns do not match (:issue:`43392`) - Bug in :func:`concat` of ``bool`` and ``boolean`` dtypes resulting in ``object`` dtype instead of ``boolean`` dtype (:issue:`42800`) - Bug in :func:`crosstab` when inputs are are categorical Series, there are categories that are not present in one or both of the Series, and ``margins=True``. Previously the margin value for missing categories was ``NaN``. It is now correctly reported as 0 (:issue:`43505`) +- Bug in :func:`concat` would fail when the ``objs`` argument all had the same index and the ``keys`` argument contained duplicates (:issue:`43595`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index c019f0b413c96..8475877f9b905 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -695,7 +695,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde else: levels = [ensure_index(x) for x in levels] - if not all_indexes_same(indexes): + if not all_indexes_same(indexes) or not all(level.is_unique for level in levels): codes_list = [] # things are potentially different sizes, so compute the exact codes diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 7c78ff5a71de3..676571e419a1a 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -3,11 +3,16 @@ deque, ) from decimal import Decimal -from warnings import catch_warnings +from warnings import ( + catch_warnings, + simplefilter, +) import numpy as np import pytest +from pandas.errors import PerformanceWarning + import pandas as pd from pandas import ( DataFrame, @@ -560,6 +565,22 @@ def test_duplicate_keys(keys): tm.assert_frame_equal(result, expected) +def test_duplicate_keys_same_frame(): + # GH 43595 + keys = ["e", "e"] + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = concat([df, df], axis=1, keys=keys) + expected_values = [[1, 4, 1, 4], [2, 5, 2, 5], [3, 6, 3, 6]] + expected_columns = MultiIndex.from_tuples( + [(keys[0], "a"), (keys[0], "b"), (keys[1], "a"), (keys[1], "b")] + ) + expected = DataFrame(expected_values, columns=expected_columns) + with catch_warnings(): + # result.columns not sorted, resulting in performance warning + simplefilter("ignore", PerformanceWarning) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( "obj", [