pandas-dev · taytzehao · Apr 26, 2021 · Apr 26, 2021 · Apr 26, 2021 · May 9, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -809,6 +809,7 @@ MultiIndex
 - Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when :class:`MultiIndex` containing ``NaN`` even when they are differently ordered (:issue:`38439`)
 - Bug in :meth:`MultiIndex.intersection` always returning empty when intersecting with :class:`CategoricalIndex` (:issue:`38653`)
 
+
 I/O
 ^^^
 
@@ -916,6 +917,8 @@ Reshaping
 - Bug in :func:`to_datetime` raising error when input sequence contains unhashable items (:issue:`39756`)
 - Bug in :meth:`Series.explode` preserving index when ``ignore_index`` was ``True`` and values were scalars (:issue:`40487`)
 - Bug in :func:`to_datetime` raising ``ValueError`` when :class:`Series` contains ``None`` and ``NaT`` and has more than 50 elements (:issue:`39882`)
+- Bug in :meth:`DataFrame.concat` does not match index names when concatenating two dataframes with a multiindex (:issue:`40849`)
+
 
 Sparse
 ^^^^^^

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -26,6 +26,7 @@
     lib,
 )
 from pandas._libs.hashtable import duplicated
+from pandas._libs.missing import NA
 from pandas._typing import (
     AnyArrayLike,
     DtypeObj,
@@ -2144,7 +2145,7 @@ def take(
             levels=self.levels, codes=taken, names=self.names, verify_integrity=False
         )
 
-    def append(self, other):
+    def append(self, other, concat_indexes=False):
         """
         Append a collection of Index options together
 
@@ -2163,11 +2164,41 @@ def append(self, other):
             (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other
         ):
             arrays = []
-            for i in range(self.nlevels):
-                label = self._get_level_values(i)
-                appended = [o._get_level_values(i) for o in other]
-                arrays.append(label.append(appended))
-            return MultiIndex.from_arrays(arrays, names=self.names)
+            if self.names.count(None) > 1 or any(
+                o.names.count(None) > 1 for o in other
+            ):
+
+                for i in range(self.nlevels):
+
+                    label = self._get_level_values(i)
+                    appended = [o._get_level_values(i) for o in other]
+                    arrays.append(label.append(appended))
+                    index_label_list = self.names
+
+            else:
+                index_label_list = self.get_unique_indexes(other)
+
+                for index_label in index_label_list:
+
+                    index = self.get_index_data(
+                        data_index=self, column_name=index_label, other=other
+                    )
+                    appended = []
+
+                    for o in other:
+
+                        data = self.get_index_data(
+                            data_index=o,
+                            column_name=index_label,
+                            other=other,
+                            search_self=True,
+                        )
+                        appended.append(data)
+
+                        index = index.append(data)
+
+                    arrays.append(index)
+            return MultiIndex.from_arrays(arrays, names=index_label_list)
 
         to_concat = (self._values,) + tuple(k._values for k in other)
         new_tuples = np.concatenate(to_concat)
@@ -2178,6 +2209,47 @@ def append(self, other):
         except (TypeError, IndexError):
             return Index(new_tuples)
 
+    def get_index_data(self, data_index, column_name, other, search_self=False):
+
+        # Returns original data if the data_index input has data for this column name
+        if column_name in data_index.names:
+            Index_position = data_index.names.index(column_name)
+            data = data_index._get_level_values(Index_position)
+            return data
+
+        else:
+
+            # If the data_index input is from other and if it don't
+            # have the column name, it returns an Index filled with pd.NA
+            # with data type that the other dataframe has the column.
+            if search_self is True:
+                if column_name in self.names:
+                    Index_position = self.names.index(column_name)
+                    NA_type = self.levels[Index_position].dtype
+                    data = Index([NA] * data_index.size, dtype=NA_type)
+                    return data
+
+            for o in other:
+                if o is not data_index and column_name in o.names:
+                    Index_position = o.names.index(column_name)
+                    NA_type = o.levels[Index_position].dtype
+                    data = Index([NA] * data_index.size, dtype=NA_type)
+                    return data
+
+    def get_unique_indexes(self, other):
+
+        Union_list = list(self.names)
+
+        for o in other:
+            if not set(o.names).issubset(Union_list):
+
+                for element in o.names:
+                    if element not in Union_list:
+
+                        Union_list.append(element)
+
+        return Union_list
+
     def argsort(self, *args, **kwargs) -> np.ndarray:
         return self._values.argsort(*args, **kwargs)
 

diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -1333,3 +1333,40 @@ def test_maxmin(self, raw_data, max_expected, min_expected):
         min_result = SparseArray(raw_data).min()
         assert max_result in max_expected
         assert min_result in min_expected
+
+
+def test_concat_with_different_index_arrangement():
+    df_first = pd.DataFrame(
+        [["i1_top", "i2_top", 1]], columns=["index1", "index2", "value1"]
+    )
+    df_second = pd.DataFrame(
+        [["i1_middle", "i2_middle", 1]], columns=["index1", "index3", "value1"]
+    )
+    df_third = pd.DataFrame(
+        [["i1_bottom", "i2_bottom", 1]], columns=["index1", "index4", "value1"]
+    )
+
+    df_concatenated_result = pd.concat(
+        [df_first, df_second, df_third], ignore_index=True
+    )
+    df_concatenated_expected = pd.DataFrame(
+        [
+            ["i1_top", "i2_top", 1, pd.NA, pd.NA],
+            ["i1_middle", pd.NA, 1, "i2_middle", pd.NA],
+            ["i1_bottom", pd.NA, 1, pd.NA, "i2_bottom"],
+        ],
+        columns=["index1", "index2", "value1", "index3", "index4"],
+    )
+
+    tm.assert_frame_equal(df_concatenated_result, df_concatenated_expected)
+
+    df_first.set_index(["index1", "index2"], inplace=True)
+    df_second.set_index(["index3", "index1"], inplace=True)
+    df_third.set_index(["index4", "index1"], inplace=True)
+
+    df_concatenated_result = pd.concat([df_first, df_second, df_third])
+    df_concatenated_expected.set_index(
+        ["index1", "index2", "index3", "index4"], inplace=True
+    )
+
+    tm.assert_frame_equal(df_concatenated_result, df_concatenated_expected)