From 28ee9f9aad75874983299605365447d5bdf687f2 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Tue, 5 Apr 2022 23:13:52 -0500 Subject: [PATCH 01/11] add tests --- pandas/tests/reshape/concat/test_index.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 50fee28669c58..96a71044d6442 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -371,3 +371,24 @@ def test_concat_with_key_not_unique(self): out_b = df_b.loc[("x", 0), :] tm.assert_frame_equal(out_a, out_b) + + def test_concat_with_duplicated_levels(self): + # keyword levels should be unique + df1 = DataFrame({"A": [1]}, index=["x"]) + df2 = DataFrame({"A": [1]}, index=["y"]) + msg = "Level values not unique: \['x', 'y', 'y'\]" + with pytest.raises(ValueError, match=msg): + concat([df1, df2], keys=["x", "y"], levels=[["x", "y", "y"]]) + + def test_concat_with_levels_with_none_keys(self): + df1 = DataFrame({"A": [1]}, index=["x"]) + df2 = DataFrame({"A": [1]}, index=["y"]) + msg = "levels supported only when keys not None" + with pytest.raises(ValueError, match=msg): + concat([df1, df2], levels=[["x", "y"]]) + + df1 = DataFrame({"A": [1]}, index=["x"]) + df2 = DataFrame({"A": [1]}, index=["y"]) + msg = "levels supported only when keys not None" + with pytest.raises(ValueError, match=msg): + concat([df1, df2], levels=[["x", "y", "y"]]) From 512314111d16f59cef698653221531bbdf97ea05 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Tue, 5 Apr 2022 23:17:08 -0500 Subject: [PATCH 02/11] Update concat.py --- pandas/core/reshape/concat.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index f2227a3e2ac83..dfaccf9516255 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -668,6 +668,8 @@ def _get_concat_axis(self) -> Index: return idx if self.keys is None: + if not self.levels is None: + raise ValueError("levels supported only when keys not None") concat_axis = _concat_indexes(indexes) else: concat_axis = _make_concat_multiindex( @@ -712,6 +714,10 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde else: levels = [ensure_index(x) for x in levels] + for level in levels: + if not level.is_unique: + raise ValueError(f"Level values not unique: {level.tolist()}") + if not all_indexes_same(indexes) or not all(level.is_unique for level in levels): codes_list = [] From 5373689bc4b6ab0897d4a4431caf1957f598c8c6 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Tue, 5 Apr 2022 23:22:34 -0500 Subject: [PATCH 03/11] Update v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 73dc832e2007b..f371bda3b445a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -92,6 +92,9 @@ Other enhancements - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) +- :meth:`pd.concat` now raises when levels is given but keys is None (:issue:`46653`) +- :meth:`pd.concat` now raises when levels duplicated (:issue:`46653`) +- .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From 039d469a0fcb44537c5ee1f87db12f1a54821662 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Tue, 5 Apr 2022 23:25:45 -0500 Subject: [PATCH 04/11] Update concat.py --- pandas/core/reshape/concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index dfaccf9516255..a5c9afca7d946 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -668,7 +668,7 @@ def _get_concat_axis(self) -> Index: return idx if self.keys is None: - if not self.levels is None: + if self.levels is not None: raise ValueError("levels supported only when keys not None") concat_axis = _concat_indexes(indexes) else: @@ -716,7 +716,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde for level in levels: if not level.is_unique: - raise ValueError(f"Level values not unique: {level.tolist()}") + raise ValueError(f"Level values not unique: {level.tolist()}") if not all_indexes_same(indexes) or not all(level.is_unique for level in levels): codes_list = [] From b890d705435ab5ce716e2a1c60e5bf6436a83d25 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Tue, 5 Apr 2022 23:27:08 -0500 Subject: [PATCH 05/11] Update test_index.py --- pandas/tests/reshape/concat/test_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 96a71044d6442..edf2fa8e6d800 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -376,7 +376,7 @@ def test_concat_with_duplicated_levels(self): # keyword levels should be unique df1 = DataFrame({"A": [1]}, index=["x"]) df2 = DataFrame({"A": [1]}, index=["y"]) - msg = "Level values not unique: \['x', 'y', 'y'\]" + msg = r"Level values not unique: \['x', 'y', 'y'\]" with pytest.raises(ValueError, match=msg): concat([df1, df2], keys=["x", "y"], levels=[["x", "y", "y"]]) From 7b516c2e3a3821ad74b6ff7493f3a9232c8101f5 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 15:18:45 -0500 Subject: [PATCH 06/11] Update pandas/core/reshape/concat.py Co-authored-by: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> --- pandas/core/reshape/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index a5c9afca7d946..054fbb85cead7 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -669,7 +669,7 @@ def _get_concat_axis(self) -> Index: if self.keys is None: if self.levels is not None: - raise ValueError("levels supported only when keys not None") + raise ValueError("levels supported only when keys is not None") concat_axis = _concat_indexes(indexes) else: concat_axis = _make_concat_multiindex( From 9b57d6437373e6e2a3ee86851ecba4cf1103cdae Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 15:18:56 -0500 Subject: [PATCH 07/11] Update doc/source/whatsnew/v1.5.0.rst Co-authored-by: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index f371bda3b445a..c57f24a92023d 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -93,7 +93,7 @@ Other enhancements - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) - :meth:`pd.concat` now raises when levels is given but keys is None (:issue:`46653`) -- :meth:`pd.concat` now raises when levels duplicated (:issue:`46653`) +- :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`) - .. --------------------------------------------------------------------------- From 598fa4b9d34fce6fbff069552c0e0abad2ecb29d Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 15:19:02 -0500 Subject: [PATCH 08/11] Update doc/source/whatsnew/v1.5.0.rst Co-authored-by: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c57f24a92023d..de4d70473f91e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -92,7 +92,7 @@ Other enhancements - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) -- :meth:`pd.concat` now raises when levels is given but keys is None (:issue:`46653`) +- :meth:`pd.concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`) - :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`) - From 08508e15a5bab94a63991dfc31b1489e10967f14 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 15:26:47 -0500 Subject: [PATCH 09/11] Update test_index.py --- pandas/tests/reshape/concat/test_index.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index edf2fa8e6d800..052ad9f6ed3c2 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -379,16 +379,11 @@ def test_concat_with_duplicated_levels(self): msg = r"Level values not unique: \['x', 'y', 'y'\]" with pytest.raises(ValueError, match=msg): concat([df1, df2], keys=["x", "y"], levels=[["x", "y", "y"]]) - - def test_concat_with_levels_with_none_keys(self): - df1 = DataFrame({"A": [1]}, index=["x"]) - df2 = DataFrame({"A": [1]}, index=["y"]) - msg = "levels supported only when keys not None" - with pytest.raises(ValueError, match=msg): - concat([df1, df2], levels=[["x", "y"]]) - + + @pytest.mark.parametrize("levels", [[["x", "y"]], [["x", "y", "y"]]]) + def test_concat_with_levels_with_none_keys(self, levels): df1 = DataFrame({"A": [1]}, index=["x"]) df2 = DataFrame({"A": [1]}, index=["y"]) msg = "levels supported only when keys not None" with pytest.raises(ValueError, match=msg): - concat([df1, df2], levels=[["x", "y", "y"]]) + concat([df1, df2], levels=levels) From 067eb670869e31cc596722bde83ab3cf811a9afe Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 15:33:46 -0500 Subject: [PATCH 10/11] Update test_index.py --- pandas/tests/reshape/concat/test_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 052ad9f6ed3c2..c99f3eaa05083 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -379,7 +379,7 @@ def test_concat_with_duplicated_levels(self): msg = r"Level values not unique: \['x', 'y', 'y'\]" with pytest.raises(ValueError, match=msg): concat([df1, df2], keys=["x", "y"], levels=[["x", "y", "y"]]) - + @pytest.mark.parametrize("levels", [[["x", "y"]], [["x", "y", "y"]]]) def test_concat_with_levels_with_none_keys(self, levels): df1 = DataFrame({"A": [1]}, index=["x"]) From 2ec812ae2cb745965483c63ad087b1394ae55207 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 15:35:36 -0500 Subject: [PATCH 11/11] Update test_index.py --- pandas/tests/reshape/concat/test_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index c99f3eaa05083..b20e4bcc2256b 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -384,6 +384,6 @@ def test_concat_with_duplicated_levels(self): def test_concat_with_levels_with_none_keys(self, levels): df1 = DataFrame({"A": [1]}, index=["x"]) df2 = DataFrame({"A": [1]}, index=["y"]) - msg = "levels supported only when keys not None" + msg = "levels supported only when keys is not None" with pytest.raises(ValueError, match=msg): concat([df1, df2], levels=levels)