From 40bd638792e135b046b9b7ba662d72d2059ea0da Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 31 Jan 2023 17:21:49 +0100 Subject: [PATCH 1/4] TEST: join df with categorical multiIndex --- pandas/tests/reshape/merge/test_join.py | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index e5927aa094193..97639a465e2ff 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.core.dtypes.common import is_categorical_dtype + import pandas as pd from pandas import ( Categorical, @@ -956,3 +958,27 @@ def test_join_empty(left_empty, how, exp): expected = expected.rename_axis("A") tm.assert_frame_equal(result, expected) + + +def test_join_multiindex_categorical_output_index_dtype(): + # GH#50906 + df1 = DataFrame( + { + "idx1": Categorical(["a", "a", "a"]), + "idx2": Categorical(["a", "a", "b"]), + "data": [1, 2, 3], + } + ).set_index(["idx1", "idx2"]) + + df2 = DataFrame( + { + "idx1": Categorical(["a", "a", "a"]), + "idx2": Categorical(["a", "b", "b"]), + "data2": [1, 2, 3], + } + ).set_index(["idx1", "idx2"]) + + for how in ["inner", "outer", "left", "right"]: + df = df1.join(df2, how=how) + assert is_categorical_dtype(df.index.levels[0]) is True + assert is_categorical_dtype(df.index.levels[1]) is True From ea7fe922a841b60e9e706f2c4467437c4fb0e180 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Wed, 1 Feb 2023 15:48:22 +0100 Subject: [PATCH 2/4] compare result and expected DataFrame --- pandas/tests/reshape/merge/test_join.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 97639a465e2ff..a79d51e925758 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_categorical_dtype - import pandas as pd from pandas import ( Categorical, @@ -978,7 +976,15 @@ def test_join_multiindex_categorical_output_index_dtype(): } ).set_index(["idx1", "idx2"]) + expected = DataFrame( + { + "idx1": Categorical(["a", "a", "a", "a"]), + "idx2": Categorical(["a", "a", "b", "b"]), + "data": [1, 2, 3, 3], + "data2": [1, 1, 2, 3], + } + ).set_index(["idx1", "idx2"]) + for how in ["inner", "outer", "left", "right"]: - df = df1.join(df2, how=how) - assert is_categorical_dtype(df.index.levels[0]) is True - assert is_categorical_dtype(df.index.levels[1]) is True + result = df1.join(df2, how=how) + tm.assert_frame_equal(result, expected) From a2988bcf57357e98e8ea147b458c44a99845de3a Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Wed, 1 Feb 2023 23:51:09 +0100 Subject: [PATCH 3/4] add pytest.mark.parameterize to the test --- pandas/tests/reshape/merge/test_join.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index a79d51e925758..f85f58e6f8e85 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -958,7 +958,8 @@ def test_join_empty(left_empty, how, exp): tm.assert_frame_equal(result, expected) -def test_join_multiindex_categorical_output_index_dtype(): +@pytest.mark.parametrize("how", ["inner", "outer", "left", "right"]) +def test_join_multiindex_categorical_output_index_dtype(how): # GH#50906 df1 = DataFrame( { @@ -985,6 +986,5 @@ def test_join_multiindex_categorical_output_index_dtype(): } ).set_index(["idx1", "idx2"]) - for how in ["inner", "outer", "left", "right"]: - result = df1.join(df2, how=how) - tm.assert_frame_equal(result, expected) + result = df1.join(df2, how=how) + tm.assert_frame_equal(result, expected) From e0b78d87276db287bb1246c4c74b87b57aeea05a Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 Feb 2023 17:28:04 +0100 Subject: [PATCH 4/4] use dataframes from the original example --- pandas/tests/reshape/merge/test_join.py | 38 +++++++++++++++---------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index f85f58e6f8e85..7008e1594712f 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -958,33 +958,41 @@ def test_join_empty(left_empty, how, exp): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("how", ["inner", "outer", "left", "right"]) -def test_join_multiindex_categorical_output_index_dtype(how): +@pytest.mark.parametrize( + "how, values", + [ + ("inner", [0, 1, 2]), + ("outer", [0, 1, 2]), + ("left", [0, 1, 2]), + ("right", [0, 2, 1]), + ], +) +def test_join_multiindex_categorical_output_index_dtype(how, values): # GH#50906 df1 = DataFrame( { - "idx1": Categorical(["a", "a", "a"]), - "idx2": Categorical(["a", "a", "b"]), - "data": [1, 2, 3], + "a": Categorical([0, 1, 2]), + "b": Categorical([0, 1, 2]), + "c": [0, 1, 2], } - ).set_index(["idx1", "idx2"]) + ).set_index(["a", "b"]) df2 = DataFrame( { - "idx1": Categorical(["a", "a", "a"]), - "idx2": Categorical(["a", "b", "b"]), - "data2": [1, 2, 3], + "a": Categorical([0, 2, 1]), + "b": Categorical([0, 2, 1]), + "d": [0, 2, 1], } - ).set_index(["idx1", "idx2"]) + ).set_index(["a", "b"]) expected = DataFrame( { - "idx1": Categorical(["a", "a", "a", "a"]), - "idx2": Categorical(["a", "a", "b", "b"]), - "data": [1, 2, 3, 3], - "data2": [1, 1, 2, 3], + "a": Categorical(values), + "b": Categorical(values), + "c": values, + "d": values, } - ).set_index(["idx1", "idx2"]) + ).set_index(["a", "b"]) result = df1.join(df2, how=how) tm.assert_frame_equal(result, expected)