Skip to content

Commit 1c1bb85

Browse files
authored
BUG: DataFrame.join reordering index levels when joining on subset of levels (#55370)
* BUG: DataFrame.join reordering index levels when joining on subset of levels * update more tests * move whatsnew to notable section
1 parent 4976b69 commit 1c1bb85

File tree

6 files changed

+69
-23
lines changed

6 files changed

+69
-23
lines changed

doc/source/whatsnew/v2.2.0.rst

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,36 @@ and ``sort=False``:
133133
134134
result
135135
136-
.. _whatsnew_220.notable_bug_fixes.notable_bug_fix2:
136+
.. _whatsnew_220.notable_bug_fixes.multiindex_join_different_levels:
137137

138-
notable_bug_fix2
139-
^^^^^^^^^^^^^^^^
138+
:func:`merge` and :meth:`DataFrame.join` no longer reorder levels when levels differ
139+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
140+
141+
In previous versions of pandas, :func:`merge` and :meth:`DataFrame.join` would reorder
142+
index levels when joining on two indexes with different levels (:issue:`34133`).
143+
144+
.. ipython:: python
145+
146+
left = pd.DataFrame({"left": 1}, index=pd.MultiIndex.from_tuples([("x", 1), ("x", 2)], names=["A", "B"]))
147+
right = pd.DataFrame({"right": 2}, index=pd.MultiIndex.from_tuples([(1, 1), (2, 2)], names=["B", "C"]))
148+
result = left.join(right)
149+
150+
*Old Behavior*
151+
152+
.. code-block:: ipython
153+
154+
In [5]: result
155+
Out[5]:
156+
left right
157+
B A C
158+
1 x 1 1 2
159+
2 x 2 1 2
160+
161+
*New Behavior*
162+
163+
.. ipython:: python
164+
165+
result
140166
141167
.. ---------------------------------------------------------------------------
142168
.. _whatsnew_220.api_breaking:
@@ -342,6 +368,7 @@ Reshaping
342368
^^^^^^^^^
343369
- Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`)
344370
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
371+
-
345372

346373
Sparse
347374
^^^^^^

pandas/core/indexes/base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4725,6 +4725,13 @@ def _join_multi(self, other: Index, how: JoinHow):
47254725

47264726
multi_join_idx = multi_join_idx.remove_unused_levels()
47274727

4728+
# maintain the order of the index levels
4729+
if how == "right":
4730+
level_order = other_names_list + ldrop_names
4731+
else:
4732+
level_order = self_names_list + rdrop_names
4733+
multi_join_idx = multi_join_idx.reorder_levels(level_order)
4734+
47284735
return multi_join_idx, lidx, ridx
47294736

47304737
jl = next(iter(overlap))

pandas/tests/reshape/merge/test_join.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -902,7 +902,7 @@ def test_join_inner_multiindex_deterministic_order():
902902
result = left.join(right, how="inner")
903903
expected = DataFrame(
904904
{"e": [5], "f": [6]},
905-
index=MultiIndex.from_tuples([(2, 1, 4, 3)], names=("b", "a", "d", "c")),
905+
index=MultiIndex.from_tuples([(1, 2, 4, 3)], names=("a", "b", "d", "c")),
906906
)
907907
tm.assert_frame_equal(result, expected)
908908

@@ -926,10 +926,16 @@ def test_join_multiindex_one_level(join_type):
926926
)
927927
right = DataFrame(data={"d": 4}, index=MultiIndex.from_tuples([(2,)], names=("b",)))
928928
result = left.join(right, how=join_type)
929-
expected = DataFrame(
930-
{"c": [3], "d": [4]},
931-
index=MultiIndex.from_tuples([(2, 1)], names=["b", "a"]),
932-
)
929+
if join_type == "right":
930+
expected = DataFrame(
931+
{"c": [3], "d": [4]},
932+
index=MultiIndex.from_tuples([(2, 1)], names=["b", "a"]),
933+
)
934+
else:
935+
expected = DataFrame(
936+
{"c": [3], "d": [4]},
937+
index=MultiIndex.from_tuples([(1, 2)], names=["a", "b"]),
938+
)
933939
tm.assert_frame_equal(result, expected)
934940

935941

pandas/tests/reshape/merge/test_multi.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,6 @@ def on_cols_multi():
6969
return ["Origin", "Destination", "Period"]
7070

7171

72-
@pytest.fixture
73-
def idx_cols_multi():
74-
return ["Origin", "Destination", "Period", "TripPurp", "LinkType"]
75-
76-
7772
class TestMergeMulti:
7873
def test_merge_on_multikey(self, left, right, join_type):
7974
on_cols = ["key1", "key2"]
@@ -815,9 +810,13 @@ def test_join_multi_levels2(self):
815810

816811

817812
class TestJoinMultiMulti:
818-
def test_join_multi_multi(
819-
self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi
820-
):
813+
def test_join_multi_multi(self, left_multi, right_multi, join_type, on_cols_multi):
814+
left_names = left_multi.index.names
815+
right_names = right_multi.index.names
816+
if join_type == "right":
817+
level_order = right_names + left_names.difference(right_names)
818+
else:
819+
level_order = left_names + right_names.difference(left_names)
821820
# Multi-index join tests
822821
expected = (
823822
merge(
@@ -826,27 +825,34 @@ def test_join_multi_multi(
826825
how=join_type,
827826
on=on_cols_multi,
828827
)
829-
.set_index(idx_cols_multi)
828+
.set_index(level_order)
830829
.sort_index()
831830
)
832831

833832
result = left_multi.join(right_multi, how=join_type).sort_index()
834833
tm.assert_frame_equal(result, expected)
835834

836835
def test_join_multi_empty_frames(
837-
self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi
836+
self, left_multi, right_multi, join_type, on_cols_multi
838837
):
839838
left_multi = left_multi.drop(columns=left_multi.columns)
840839
right_multi = right_multi.drop(columns=right_multi.columns)
841840

841+
left_names = left_multi.index.names
842+
right_names = right_multi.index.names
843+
if join_type == "right":
844+
level_order = right_names + left_names.difference(right_names)
845+
else:
846+
level_order = left_names + right_names.difference(left_names)
847+
842848
expected = (
843849
merge(
844850
left_multi.reset_index(),
845851
right_multi.reset_index(),
846852
how=join_type,
847853
on=on_cols_multi,
848854
)
849-
.set_index(idx_cols_multi)
855+
.set_index(level_order)
850856
.sort_index()
851857
)
852858

pandas/tests/series/methods/test_align.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,10 +240,10 @@ def test_align_left_different_named_levels():
240240
result_left, result_right = left.align(right)
241241

242242
expected_left = Series(
243-
[2], index=pd.MultiIndex.from_tuples([(1, 3, 4, 2)], names=["a", "c", "d", "b"])
243+
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
244244
)
245245
expected_right = Series(
246-
[1], index=pd.MultiIndex.from_tuples([(1, 3, 4, 2)], names=["a", "c", "d", "b"])
246+
[1], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
247247
)
248248
tm.assert_series_equal(result_left, expected_left)
249249
tm.assert_series_equal(result_right, expected_right)

pandas/tests/series/test_arithmetic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -941,8 +941,8 @@ def test_series_varied_multiindex_alignment():
941941
expected = Series(
942942
[1000, 2001, 3002, 4003],
943943
index=pd.MultiIndex.from_tuples(
944-
[("x", 1, "a"), ("x", 2, "a"), ("y", 1, "a"), ("y", 2, "a")],
945-
names=["xy", "num", "ab"],
944+
[("a", "x", 1), ("a", "x", 2), ("a", "y", 1), ("a", "y", 2)],
945+
names=["ab", "xy", "num"],
946946
),
947947
)
948948
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)