diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 73dc832e2007b..de4d70473f91e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -92,6 +92,9 @@ Other enhancements - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) +- :meth:`pd.concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`) +- :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`) +- .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index f2227a3e2ac83..054fbb85cead7 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -668,6 +668,8 @@ def _get_concat_axis(self) -> Index: return idx if self.keys is None: + if self.levels is not None: + raise ValueError("levels supported only when keys is not None") concat_axis = _concat_indexes(indexes) else: concat_axis = _make_concat_multiindex( @@ -712,6 +714,10 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde else: levels = [ensure_index(x) for x in levels] + for level in levels: + if not level.is_unique: + raise ValueError(f"Level values not unique: {level.tolist()}") + if not all_indexes_same(indexes) or not all(level.is_unique for level in levels): codes_list = [] diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 50fee28669c58..b20e4bcc2256b 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -371,3 +371,19 @@ def test_concat_with_key_not_unique(self): out_b = df_b.loc[("x", 0), :] tm.assert_frame_equal(out_a, out_b) + + def test_concat_with_duplicated_levels(self): + # keyword levels should be unique + df1 = DataFrame({"A": [1]}, index=["x"]) + df2 = DataFrame({"A": [1]}, index=["y"]) + msg = r"Level values not unique: \['x', 'y', 'y'\]" + with pytest.raises(ValueError, match=msg): + concat([df1, df2], keys=["x", "y"], levels=[["x", "y", "y"]]) + + @pytest.mark.parametrize("levels", [[["x", "y"]], [["x", "y", "y"]]]) + def test_concat_with_levels_with_none_keys(self, levels): + df1 = DataFrame({"A": [1]}, index=["x"]) + df2 = DataFrame({"A": [1]}, index=["y"]) + msg = "levels supported only when keys is not None" + with pytest.raises(ValueError, match=msg): + concat([df1, df2], levels=levels)