From 869df9ed13afda995fa20e3bfb1294294658eb45 Mon Sep 17 00:00:00 2001
From: jmarin <javier.marin@satellogic.com>
Date: Mon, 26 Feb 2024 23:15:18 +0100
Subject: [PATCH 1/8] Ensure dataframe preserves categorical index in
 constructor with categorical series

---
 pandas/core/frame.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e5d424b15e69e..9ebdc230bc9f8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -808,6 +808,21 @@ def __init__(
             if len(data) > 0:
                 if is_dataclass(data[0]):
                     data = dataclasses_to_dicts(data)
+
+                # Check if all elements in data are Series with categorical indices
+                if all(
+                    isinstance(item, Series)
+                    and isinstance(item.index, pandas.CategoricalIndex)
+                    for item in data
+                ):
+                    all_categorical = True
+                    # Combine all categories
+                    categories = pandas.CategoricalIndex(
+                        np.unique(np.concatenate([s.index.categories for s in data]))
+                    )
+                else:
+                    all_categorical = False
+
                 if not isinstance(data, np.ndarray) and treat_as_nested(data):
                     # exclude ndarray as we may have cast it a few lines above
                     if columns is not None:
@@ -820,6 +835,13 @@ def __init__(
                         index,  # type: ignore[arg-type]
                         dtype,
                     )
+
+                    if all_categorical:
+                        # Ensure columns are CategoricalIndex
+                        columns = pandas.CategoricalIndex(
+                            columns, categories=categories, ordered=True
+                        )
+
                     mgr = arrays_to_mgr(
                         arrays,
                         columns,

From 2e4fb2f94677d6651cc10b11e34881198a5be9e0 Mon Sep 17 00:00:00 2001
From: jmarin <javier.marin@satellogic.com>
Date: Sat, 2 Mar 2024 01:10:00 +0100
Subject: [PATCH 2/8] Modify union to properly handle categoricalIndex

---
 pandas/core/frame.py                    | 20 --------------------
 pandas/core/indexes/base.py             | 13 ++++++++++++-
 pandas/tests/frame/test_constructors.py |  2 +-
 3 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9ebdc230bc9f8..fa29af99c9cce 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -809,20 +809,6 @@ def __init__(
                 if is_dataclass(data[0]):
                     data = dataclasses_to_dicts(data)
 
-                # Check if all elements in data are Series with categorical indices
-                if all(
-                    isinstance(item, Series)
-                    and isinstance(item.index, pandas.CategoricalIndex)
-                    for item in data
-                ):
-                    all_categorical = True
-                    # Combine all categories
-                    categories = pandas.CategoricalIndex(
-                        np.unique(np.concatenate([s.index.categories for s in data]))
-                    )
-                else:
-                    all_categorical = False
-
                 if not isinstance(data, np.ndarray) and treat_as_nested(data):
                     # exclude ndarray as we may have cast it a few lines above
                     if columns is not None:
@@ -836,12 +822,6 @@ def __init__(
                         dtype,
                     )
 
-                    if all_categorical:
-                        # Ensure columns are CategoricalIndex
-                        columns = pandas.CategoricalIndex(
-                            columns, categories=categories, ordered=True
-                        )
-
                     mgr = arrays_to_mgr(
                         arrays,
                         columns,
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c17e01b85fa84..bd497203c9f02 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -212,6 +212,7 @@
         PeriodArray,
     )
 
+
 __all__ = ["Index"]
 
 _unsortable_types = frozenset(("mixed", "mixed-integer"))
@@ -2912,7 +2913,17 @@ def union(self, other, sort=None):
         self._assert_can_do_setop(other)
         other, result_name = self._convert_can_do_setop(other)
 
-        if self.dtype != other.dtype:
+        if isinstance(self.dtype, CategoricalDtype) and isinstance(
+            other.dtype, CategoricalDtype
+        ):
+            # Unite both categories
+            both_categories = np.union1d(self.categories, other.categories)
+
+            # Convert both indexes to have the same categories
+            self = self.set_categories(both_categories)
+            other = other.set_categories(both_categories)
+
+        elif self.dtype != other.dtype:
             if (
                 isinstance(self, ABCMultiIndex)
                 and not is_object_dtype(_unpack_nested_dtype(other))
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 7d1a5b4492740..20c940ad0c6dd 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -2349,7 +2349,7 @@ def test_construct_with_two_categoricalindex_series(self):
         result = DataFrame([s1, s2])
         expected = DataFrame(
             np.array([[39, 6, 4, np.nan, np.nan], [152.0, 242.0, 150.0, 2.0, 2.0]]),
-            columns=["female", "male", "unknown", "f", "m"],
+            columns=CategoricalIndex(["female", "male", "unknown", "f", "m"]),
         )
         tm.assert_frame_equal(result, expected)
 

From 4ebc935dbe4f3a5cba4d6ecc8bfc43feec893654 Mon Sep 17 00:00:00 2001
From: jmarin <javier.marin@satellogic.com>
Date: Sat, 2 Mar 2024 01:10:00 +0100
Subject: [PATCH 3/8] Modify union to properly handle categoricalIndex

---
 pandas/core/frame.py                       | 20 --------------------
 pandas/core/indexes/base.py                | 16 ++++++++++++++--
 pandas/tests/frame/test_constructors.py    |  2 +-
 pandas/tests/groupby/test_categorical.py   |  2 +-
 pandas/tests/indexes/test_setops.py        |  4 ++--
 pandas/tests/reshape/concat/test_append.py |  2 ++
 6 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9ebdc230bc9f8..fa29af99c9cce 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -809,20 +809,6 @@ def __init__(
                 if is_dataclass(data[0]):
                     data = dataclasses_to_dicts(data)
 
-                # Check if all elements in data are Series with categorical indices
-                if all(
-                    isinstance(item, Series)
-                    and isinstance(item.index, pandas.CategoricalIndex)
-                    for item in data
-                ):
-                    all_categorical = True
-                    # Combine all categories
-                    categories = pandas.CategoricalIndex(
-                        np.unique(np.concatenate([s.index.categories for s in data]))
-                    )
-                else:
-                    all_categorical = False
-
                 if not isinstance(data, np.ndarray) and treat_as_nested(data):
                     # exclude ndarray as we may have cast it a few lines above
                     if columns is not None:
@@ -836,12 +822,6 @@ def __init__(
                         dtype,
                     )
 
-                    if all_categorical:
-                        # Ensure columns are CategoricalIndex
-                        columns = pandas.CategoricalIndex(
-                            columns, categories=categories, ordered=True
-                        )
-
                     mgr = arrays_to_mgr(
                         arrays,
                         columns,
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c17e01b85fa84..f446f3a55bd0f 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -212,6 +212,7 @@
         PeriodArray,
     )
 
+
 __all__ = ["Index"]
 
 _unsortable_types = frozenset(("mixed", "mixed-integer"))
@@ -2912,7 +2913,18 @@ def union(self, other, sort=None):
         self._assert_can_do_setop(other)
         other, result_name = self._convert_can_do_setop(other)
 
-        if self.dtype != other.dtype:
+        if isinstance(self.dtype, CategoricalDtype) and isinstance(
+            other.dtype, CategoricalDtype
+        ):
+            # Unite both categories
+            both_categories = np.union1d(self.categories, other.categories)
+            # if ordered and unordered, we set categories to be unordered
+            ordered = False if self.ordered != other.ordered else None
+            # Convert both indexes to have the same categories
+            self = self.set_categories(both_categories, ordered=ordered)
+            other = other.set_categories(both_categories, ordered=ordered)
+
+        elif self.dtype != other.dtype:
             if (
                 isinstance(self, ABCMultiIndex)
                 and not is_object_dtype(_unpack_nested_dtype(other))
@@ -3006,7 +3018,7 @@ def _union(self, other: Index, sort: bool | None):
         else:
             missing = algos.unique1d(self.get_indexer_non_unique(other)[1])
 
-        result: Index | MultiIndex | ArrayLike
+        result: Index | MultiIndex | CategoricalIndex | ArrayLike
         if self._is_multi:
             # Preserve MultiIndex to avoid losing dtypes
             result = self.append(other.take(missing))
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 7d1a5b4492740..20c940ad0c6dd 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -2349,7 +2349,7 @@ def test_construct_with_two_categoricalindex_series(self):
         result = DataFrame([s1, s2])
         expected = DataFrame(
             np.array([[39, 6, 4, np.nan, np.nan], [152.0, 242.0, 150.0, 2.0, 2.0]]),
-            columns=["female", "male", "unknown", "f", "m"],
+            columns=CategoricalIndex(["female", "male", "unknown", "f", "m"]),
         )
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 10eca5ea8427f..c36e100aa0a0f 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -714,7 +714,7 @@ def test_describe_categorical_columns():
     # GH 11558
     cats = CategoricalIndex(
         ["qux", "foo", "baz", "bar"],
-        categories=["foo", "bar", "baz", "qux"],
+        categories=["bar", "baz", "foo", "qux"],
         ordered=True,
     )
     df = DataFrame(np.random.default_rng(2).standard_normal((20, 4)), columns=cats)
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index 27b54ea66f0ac..6fae11781e623 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -573,7 +573,7 @@ def test_union_duplicate_index_subsets_of_each_other(
 
     expected = Index([1, 2, 2, 3, 3, 4], dtype=dtype)
     if isinstance(a, CategoricalIndex):
-        expected = Index([1, 2, 2, 3, 3, 4])
+        expected = CategoricalIndex([1, 2, 2, 3, 3, 4])
     result = a.union(b)
     tm.assert_index_equal(result, expected)
     result = a.union(b, sort=False)
@@ -670,7 +670,7 @@ def test_union_with_duplicate_index_not_subset_and_non_monotonic(
     b = Index([0, 0, 1], dtype=dtype)
     expected = Index([0, 0, 1, 2], dtype=dtype)
     if isinstance(a, CategoricalIndex):
-        expected = Index([0, 0, 1, 2])
+        expected = CategoricalIndex([0, 0, 1, 2])
 
     result = a.union(b)
     tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py
index 3fb6a3fb61396..96ca06e1d16a4 100644
--- a/pandas/tests/reshape/concat/test_append.py
+++ b/pandas/tests/reshape/concat/test_append.py
@@ -234,6 +234,8 @@ def test_append_different_columns_types(self, df_columns, series_index):
         result = df._append(ser)
         idx_diff = ser.index.difference(df_columns)
         combined_columns = Index(df_columns.tolist()).append(idx_diff)
+        if isinstance(result.columns, pd.CategoricalIndex):
+            combined_columns = pd.CategoricalIndex(combined_columns)
         expected = DataFrame(
             [
                 [1.0, 2.0, 3.0, np.nan, np.nan, np.nan],

From 8afb17285bb36c1feef186a81044c7e1dd02918b Mon Sep 17 00:00:00 2001
From: jmarin <javier.marin@satellogic.com>
Date: Sun, 3 Mar 2024 21:08:51 +0100
Subject: [PATCH 4/8] Handling properly all cases and adapt tests accordingly

---
 pandas/core/frame.py                     | 2 --
 pandas/core/indexes/base.py              | 8 ++++++--
 pandas/tests/groupby/test_categorical.py | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index fa29af99c9cce..e5d424b15e69e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -808,7 +808,6 @@ def __init__(
             if len(data) > 0:
                 if is_dataclass(data[0]):
                     data = dataclasses_to_dicts(data)
-
                 if not isinstance(data, np.ndarray) and treat_as_nested(data):
                     # exclude ndarray as we may have cast it a few lines above
                     if columns is not None:
@@ -821,7 +820,6 @@ def __init__(
                         index,  # type: ignore[arg-type]
                         dtype,
                     )
-
                     mgr = arrays_to_mgr(
                         arrays,
                         columns,
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index f446f3a55bd0f..ab2d3867992d4 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2916,8 +2916,12 @@ def union(self, other, sort=None):
         if isinstance(self.dtype, CategoricalDtype) and isinstance(
             other.dtype, CategoricalDtype
         ):
-            # Unite both categories
-            both_categories = np.union1d(self.categories, other.categories)
+            both_categories = self.categories
+            if len(self.categories) != len(other.categories) or any(
+                self.categories != other.categories
+            ):
+                # Unite both categories
+                both_categories = np.union1d(self.categories, other.categories)
             # if ordered and unordered, we set categories to be unordered
             ordered = False if self.ordered != other.ordered else None
             # Convert both indexes to have the same categories
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index c36e100aa0a0f..10eca5ea8427f 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -714,7 +714,7 @@ def test_describe_categorical_columns():
     # GH 11558
     cats = CategoricalIndex(
         ["qux", "foo", "baz", "bar"],
-        categories=["bar", "baz", "foo", "qux"],
+        categories=["foo", "bar", "baz", "qux"],
         ordered=True,
     )
     df = DataFrame(np.random.default_rng(2).standard_normal((20, 4)), columns=cats)

From 6a0b1af4799977e5e37412f99c49af17428c253e Mon Sep 17 00:00:00 2001
From: jmarin <javier.marin@satellogic.com>
Date: Sun, 3 Mar 2024 22:23:48 +0100
Subject: [PATCH 5/8] Type: ignore[attr-define] when self and other are
 CategoricalIndex

---
 pandas/core/indexes/base.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ab2d3867992d4..1045c41824e0e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2916,16 +2916,16 @@ def union(self, other, sort=None):
         if isinstance(self.dtype, CategoricalDtype) and isinstance(
             other.dtype, CategoricalDtype
         ):
-            both_categories = self.categories
-            if len(self.categories) != len(other.categories) or any(
-                self.categories != other.categories
+            both_categories = self.categories  # type: ignore[attr-defined]
+            if len(self.categories) != len(other.categories) or any(  # type: ignore[attr-defined]
+                self.categories != other.categories  # type: ignore[attr-defined]
             ):
                 # Unite both categories
-                both_categories = np.union1d(self.categories, other.categories)
+                both_categories = np.union1d(self.categories, other.categories)  # type: ignore[attr-defined]
             # if ordered and unordered, we set categories to be unordered
-            ordered = False if self.ordered != other.ordered else None
+            ordered = False if self.ordered != other.ordered else None  # type: ignore[attr-defined]
             # Convert both indexes to have the same categories
-            self = self.set_categories(both_categories, ordered=ordered)
+            self = self.set_categories(both_categories, ordered=ordered)  # type: ignore[attr-defined]
             other = other.set_categories(both_categories, ordered=ordered)
 
         elif self.dtype != other.dtype:

From f5e4148e53b9d5f01bc56f78a8091a6bc2627df5 Mon Sep 17 00:00:00 2001
From: jmarin <javier.marin@satellogic.com>
Date: Wed, 6 Mar 2024 22:02:08 +0100
Subject: [PATCH 6/8] Use union_categoricals instead of union1d from numpy

---
 pandas/core/indexes/base.py | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 1045c41824e0e..0b248107f0be2 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -112,7 +112,10 @@
     pandas_dtype,
     validate_all_hashable,
 )
-from pandas.core.dtypes.concat import concat_compat
+from pandas.core.dtypes.concat import (
+    concat_compat,
+    union_categoricals,
+)
 from pandas.core.dtypes.dtypes import (
     ArrowDtype,
     CategoricalDtype,
@@ -2913,20 +2916,27 @@ def union(self, other, sort=None):
         self._assert_can_do_setop(other)
         other, result_name = self._convert_can_do_setop(other)
 
-        if isinstance(self.dtype, CategoricalDtype) and isinstance(
-            other.dtype, CategoricalDtype
-        ):
-            both_categories = self.categories  # type: ignore[attr-defined]
-            if len(self.categories) != len(other.categories) or any(  # type: ignore[attr-defined]
-                self.categories != other.categories  # type: ignore[attr-defined]
-            ):
-                # Unite both categories
-                both_categories = np.union1d(self.categories, other.categories)  # type: ignore[attr-defined]
+        from pandas import CategoricalIndex
+
+        if isinstance(self, CategoricalIndex) and isinstance(other, CategoricalIndex):
+            both_categories = self.categories
             # if ordered and unordered, we set categories to be unordered
-            ordered = False if self.ordered != other.ordered else None  # type: ignore[attr-defined]
+            ordered = False if self.ordered != other.ordered else None
+            if len(self.categories) != len(other.categories) or any(
+                self.categories != other.categories
+            ):
+                if ordered is False:
+                    both_categories = union_categoricals(
+                        [self.as_unordered(), other.as_unordered()],  # type: ignore[attr-defined]
+                        sort_categories=True,
+                    ).categories
+                else:
+                    both_categories = union_categoricals(
+                        [self, other], sort_categories=True
+                    ).categories
             # Convert both indexes to have the same categories
             self = self.set_categories(both_categories, ordered=ordered)  # type: ignore[attr-defined]
-            other = other.set_categories(both_categories, ordered=ordered)
+            other = other.set_categories(both_categories, ordered=ordered)  # type: ignore[attr-defined]
 
         elif self.dtype != other.dtype:
             if (

From cb3e6b697ec7078e3cbc8834d4a8999056827529 Mon Sep 17 00:00:00 2001
From: jmarin <javier.marin@satellogic.com>
Date: Thu, 7 Mar 2024 23:30:06 +0100
Subject: [PATCH 7/8] Change from CategoricalIndex to CategoricalDtype check

---
 pandas/core/indexes/base.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 0b248107f0be2..d2783e2c7ef8c 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2916,14 +2916,14 @@ def union(self, other, sort=None):
         self._assert_can_do_setop(other)
         other, result_name = self._convert_can_do_setop(other)
 
-        from pandas import CategoricalIndex
-
-        if isinstance(self, CategoricalIndex) and isinstance(other, CategoricalIndex):
-            both_categories = self.categories
+        if isinstance(self.dtype, CategoricalDtype) and isinstance(
+            other.dtype, CategoricalDtype
+        ):
+            both_categories = self.categories  # type: ignore[attr-defined]
             # if ordered and unordered, we set categories to be unordered
-            ordered = False if self.ordered != other.ordered else None
-            if len(self.categories) != len(other.categories) or any(
-                self.categories != other.categories
+            ordered = False if self.ordered != other.ordered else None  # type: ignore[attr-defined]
+            if len(self.categories) != len(other.categories) or any(  # type: ignore[attr-defined]
+                self.categories != other.categories  # type: ignore[attr-defined]
             ):
                 if ordered is False:
                     both_categories = union_categoricals(
@@ -2936,7 +2936,7 @@ def union(self, other, sort=None):
                     ).categories
             # Convert both indexes to have the same categories
             self = self.set_categories(both_categories, ordered=ordered)  # type: ignore[attr-defined]
-            other = other.set_categories(both_categories, ordered=ordered)  # type: ignore[attr-defined]
+            other = other.set_categories(both_categories, ordered=ordered)
 
         elif self.dtype != other.dtype:
             if (

From b332805a2c25568580bb6c47c074aa34fc6e7cb0 Mon Sep 17 00:00:00 2001
From: jmarin <javier.marin@satellogic.com>
Date: Wed, 27 Mar 2024 23:22:36 +0100
Subject: [PATCH 8/8] Improve code to handle it in the original conditional

---
 pandas/core/indexes/base.py | 41 ++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 33a04944655c6..5be2319088f9a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2916,15 +2916,23 @@ def union(self, other, sort=None):
         self._assert_can_do_setop(other)
         other, result_name = self._convert_can_do_setop(other)
 
-        if isinstance(self.dtype, CategoricalDtype) and isinstance(
-            other.dtype, CategoricalDtype
-        ):
-            both_categories = self.categories  # type: ignore[attr-defined]
-            # if ordered and unordered, we set categories to be unordered
-            ordered = False if self.ordered != other.ordered else None  # type: ignore[attr-defined]
-            if len(self.categories) != len(other.categories) or any(  # type: ignore[attr-defined]
-                self.categories != other.categories  # type: ignore[attr-defined]
+        if self.dtype != other.dtype:
+            if (
+                isinstance(self, ABCMultiIndex)
+                and not is_object_dtype(_unpack_nested_dtype(other))
+                and len(other) > 0
+            ):
+                raise NotImplementedError(
+                    "Can only union MultiIndex with MultiIndex or Index of tuples, "
+                    "try mi.to_flat_index().union(other) instead."
+                )
+
+            if isinstance(self, ABCCategoricalIndex) and isinstance(
+                other, ABCCategoricalIndex
             ):
+                both_categories = self.categories
+                # if ordered and unordered, we set categories to be unordered
+                ordered = False if self.ordered != other.ordered else None
                 if ordered is False:
                     both_categories = union_categoricals(
                         [self.as_unordered(), other.as_unordered()],  # type: ignore[attr-defined]
@@ -2934,20 +2942,11 @@ def union(self, other, sort=None):
                     both_categories = union_categoricals(
                         [self, other], sort_categories=True
                     ).categories
-            # Convert both indexes to have the same categories
-            self = self.set_categories(both_categories, ordered=ordered)  # type: ignore[attr-defined]
-            other = other.set_categories(both_categories, ordered=ordered)
+                # Convert both indexes to have the same categories
+                self = self.set_categories(both_categories, ordered=ordered)  # type: ignore[attr-defined]
+                other = other.set_categories(both_categories, ordered=ordered)  # type: ignore[attr-defined]
+                return self.union(other, sort=sort)
 
-        elif self.dtype != other.dtype:
-            if (
-                isinstance(self, ABCMultiIndex)
-                and not is_object_dtype(_unpack_nested_dtype(other))
-                and len(other) > 0
-            ):
-                raise NotImplementedError(
-                    "Can only union MultiIndex with MultiIndex or Index of tuples, "
-                    "try mi.to_flat_index().union(other) instead."
-                )
             self, other = self._dti_setop_align_tzs(other, "union")
 
             dtype = self._find_common_type_compat(other)