add multi index with categories test

Hanspagh · Hanspagh · commit 6b0011e7eadc · 2022-11-30T18:01:57.000+01:00
diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
@@ -14,9 +14,7 @@
 
 
 @pytest.mark.parametrize("case", [0.5, "xxx"])
-@pytest.mark.parametrize(
-    "method", ["intersection", "union", "difference", "symmetric_difference"]
-)
+@pytest.mark.parametrize("method", ["intersection", "union", "difference", "symmetric_difference"])
 def test_set_ops_error_cases(idx, case, sort, method):
     # non-iterable input
     msg = "Input must be Index or array-like"
@@ -299,9 +297,7 @@ def test_intersection(idx, sort):
     assert result.equals(idx)
 
 
-@pytest.mark.parametrize(
-    "method", ["intersection", "union", "difference", "symmetric_difference"]
-)
+@pytest.mark.parametrize("method", ["intersection", "union", "difference", "symmetric_difference"])
 def test_setop_with_categorical(idx, sort, method):
     other = idx.to_flat_index().astype("category")
     res_names = [None] * idx.nlevels
@@ -428,9 +424,7 @@ def test_union_multiindex_empty_rangeindex():
     tm.assert_index_equal(mi, result_right, check_names=False)
 
 
-@pytest.mark.parametrize(
-    "method", ["union", "intersection", "difference", "symmetric_difference"]
-)
+@pytest.mark.parametrize("method", ["union", "intersection", "difference", "symmetric_difference"])
 def test_setops_disallow_true(method):
     idx1 = MultiIndex.from_product([["a", "b"], [1, 2]])
     idx2 = MultiIndex.from_product([["b", "c"], [1, 2]])
@@ -442,12 +436,8 @@ def test_setops_disallow_true(method):
 @pytest.mark.parametrize("val", [pd.NA, 100])
 def test_difference_keep_ea_dtypes(any_numeric_ea_dtype, val):
     # GH#48606
-    midx = MultiIndex.from_arrays(
-        [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
-    )
-    midx2 = MultiIndex.from_arrays(
-        [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
-    )
+    midx = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None])
+    midx2 = MultiIndex.from_arrays([Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]])
     result = midx.difference(midx2)
     expected = MultiIndex.from_arrays([Series([1], dtype=any_numeric_ea_dtype), [2]])
     tm.assert_index_equal(result, expected)
@@ -463,16 +453,10 @@ def test_difference_keep_ea_dtypes(any_numeric_ea_dtype, val):
 @pytest.mark.parametrize("val", [pd.NA, 5])
 def test_symmetric_difference_keeping_ea_dtype(any_numeric_ea_dtype, val):
     # GH#48607
-    midx = MultiIndex.from_arrays(
-        [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
-    )
-    midx2 = MultiIndex.from_arrays(
-        [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
-    )
+    midx = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None])
+    midx2 = MultiIndex.from_arrays([Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]])
     result = midx.symmetric_difference(midx2)
-    expected = MultiIndex.from_arrays(
-        [Series([1, 1, val], dtype=any_numeric_ea_dtype), [1, 2, 3]]
-    )
+    expected = MultiIndex.from_arrays([Series([1, 1, val], dtype=any_numeric_ea_dtype), [1, 2, 3]])
     tm.assert_index_equal(result, expected)
 
 
@@ -566,9 +550,7 @@ def test_union_nan_got_duplicated(dtype, sort):
     mi2 = MultiIndex.from_arrays([pd.array([1.0, np.nan, 3.0], dtype=dtype), [2, 3, 4]])
     result = mi1.union(mi2, sort=sort)
     if sort is None:
-        expected = MultiIndex.from_arrays(
-            [pd.array([1.0, 3.0, np.nan], dtype=dtype), [2, 4, 3]]
-        )
+        expected = MultiIndex.from_arrays([pd.array([1.0, 3.0, np.nan], dtype=dtype), [2, 4, 3]])
     else:
         expected = mi2
     tm.assert_index_equal(result, expected)
@@ -584,13 +566,9 @@ def test_union_keep_ea_dtype(any_numeric_ea_dtype, val):
     midx2 = MultiIndex.from_arrays([arr2, [2, 1]])
     result = midx.union(midx2)
     if val == 4:
-        expected = MultiIndex.from_arrays(
-            [Series([1, 2, 4], dtype=any_numeric_ea_dtype), [1, 2, 1]]
-        )
+        expected = MultiIndex.from_arrays([Series([1, 2, 4], dtype=any_numeric_ea_dtype), [1, 2, 1]])
     else:
-        expected = MultiIndex.from_arrays(
-            [Series([1, 2], dtype=any_numeric_ea_dtype), [1, 2]]
-        )
+        expected = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [1, 2]])
     tm.assert_index_equal(result, expected)
 
 
@@ -637,9 +615,7 @@ def test_union_duplicates(index, request):
         # and loses type information. Result is then unsigned only when values are
         # sufficiently large to require unsigned dtype. This happens only if other
         # has dups or one of both have missing values
-        expected = expected.set_levels(
-            [expected.levels[0].astype(int), expected.levels[1]]
-        )
+        expected = expected.set_levels([expected.levels[0].astype(int), expected.levels[1]])
     result = mi1.union(mi2)
     tm.assert_index_equal(result, expected)
 
@@ -666,9 +642,7 @@ def test_union_keep_ea_dtype_with_na(any_numeric_ea_dtype):
     midx = MultiIndex.from_arrays([arr1, [2, 1]], names=["a", None])
     midx2 = MultiIndex.from_arrays([arr2, [1, 2]])
     result = midx.union(midx2)
-    expected = MultiIndex.from_arrays(
-        [Series([1, 4, pd.NA, pd.NA], dtype=any_numeric_ea_dtype), [1, 2, 1, 2]]
-    )
+    expected = MultiIndex.from_arrays([Series([1, 4, pd.NA, pd.NA], dtype=any_numeric_ea_dtype), [1, 2, 1, 2]])
     tm.assert_index_equal(result, expected)
 
 
@@ -692,15 +666,41 @@ def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names):
     assert mi_int._lexsort_depth == 2
 
 
+@pytest.mark.parametrize(
+    "a",
+    [pd.Categorical(["a", "b"], categories=["a", "b"]), ["a", "b"]],
+)
+@pytest.mark.parametrize(
+    "b",
+    [pd.Categorical(["a", "b"], categories=["b", "a"]), pd.Categorical(["a", "b"], categories=["b", "a"])],
+)
+def test_intersection_with_non_lex_sorted_categories(a, b):
+    # GH#49974
+    other = ["1", "2"]
+
+    df1 = pd.DataFrame({"x": a, "y": other})
+    df2 = pd.DataFrame({"x": b, "y": other})
+
+    expected = pd.MultiIndex.from_arrays([a, other], names=["x", "y"])
+
+    res1 = pd.MultiIndex.from_frame(df1).intersection(pd.MultiIndex.from_frame(df2.sort_values(["x", "y"])))
+    res2 = pd.MultiIndex.from_frame(df1).intersection(pd.MultiIndex.from_frame(df2))
+    res3 = pd.MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(pd.MultiIndex.from_frame(df2))
+    res4 = pd.MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
+        pd.MultiIndex.from_frame(df2.sort_values(["x", "y"]))
+    )
+
+    tm.assert_index_equal(res1, expected)
+    tm.assert_index_equal(res2, expected)
+    tm.assert_index_equal(res3, expected)
+    tm.assert_index_equal(res4, expected)
+
+
 @pytest.mark.parametrize("val", [pd.NA, 100])
 def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype):
     # GH#48604
-    midx = MultiIndex.from_arrays(
-        [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
-    )
-    midx2 = MultiIndex.from_arrays(
-        [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
-    )
+    midx = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None])
+    midx2 = MultiIndex.from_arrays([Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]])
     result = midx.intersection(midx2)
     expected = MultiIndex.from_arrays([Series([2], dtype=any_numeric_ea_dtype), [1]])
     tm.assert_index_equal(result, expected)