From f8cf24e4483ec4f2719de7caf15796dfb5eddd3a Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 16:13:33 +0800 Subject: [PATCH 01/14] Update pandas concat multiindex --- doc/source/whatsnew/v1.3.0.rst | 2 ++ pandas/core/indexes/multi.py | 5 ++++- pandas/tests/arrays/sparse/test_array.py | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 31517e363140d..322d1d9daee0f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -769,6 +769,8 @@ MultiIndex - Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in result (:issue:`38623`) - Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when :class:`MultiIndex` containing ``NaN`` even when they are differently ordered (:issue:`38439`) - Bug in :meth:`MultiIndex.intersection` always returning empty when intersecting with :class:`CategoricalIndex` (:issue:`38653`) +- Bug in :meth:`DataFrame.concat` does not match index names when concatenating two dataframes with a multiindex (:issue:`40849`) + I/O ^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 794f13bbfb6b1..b1f7b3e417b15 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2165,7 +2165,10 @@ def append(self, other): arrays = [] for i in range(self.nlevels): label = self._get_level_values(i) - appended = [o._get_level_values(i) for o in other] + if label.names[0]: + appended = [o._get_level_values(o.names.index(label.names[0])) for o in other] + else: + appended = [o._get_level_values(i) for o in other] arrays.append(label.append(appended)) return MultiIndex.from_arrays(arrays, names=self.names) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index e073871f96bb4..308003cd05092 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1311,3 +1311,17 @@ def test_dropna(fill_value): df = pd.DataFrame({"a": [0, 1], "b": arr}) expected_df = pd.DataFrame({"a": [1], "b": exp}, index=pd.Int64Index([1])) tm.assert_equal(df.dropna(), expected_df) + + +def test_maxmin(): + data = np.arange(10).astype(float) + max_out = SparseArray(data).max() + min_out = SparseArray(data).min() + assert max_out == 9 + assert min_out == 0 + + data = data*(-1) + max_out = SparseArray(data).max() + min_out = SparseArray(data).min() + assert max_out == 0 + assert min_out == -9 From e5d2d06948613897a621d978ae841a93ff1f425e Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 16:21:19 +0800 Subject: [PATCH 02/14] Update pandas concat multiindex 2 --- pandas/core/indexes/multi.py | 3 ++- pandas/tests/arrays/sparse/test_array.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b1f7b3e417b15..b3658ed0a2ac5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2166,7 +2166,8 @@ def append(self, other): for i in range(self.nlevels): label = self._get_level_values(i) if label.names[0]: - appended = [o._get_level_values(o.names.index(label.names[0])) for o in other] + appended = [o._get_level_values + (o.names.index(label.names[0])) for o in other] else: appended = [o._get_level_values(i) for o in other] arrays.append(label.append(appended)) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 308003cd05092..c2c47daa374c5 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1320,7 +1320,7 @@ def test_maxmin(): assert max_out == 9 assert min_out == 0 - data = data*(-1) + data = data * (-1) max_out = SparseArray(data).max() min_out = SparseArray(data).min() assert max_out == 0 From 2387a62174ab96e43a62e5cc539bb0769f3e1cc4 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 26 Apr 2021 18:12:12 +0800 Subject: [PATCH 03/14] Update pandas concat multiindex 3 --- pandas/core/indexes/multi.py | 4 ++-- pandas/core/reshape/concat.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b3658ed0a2ac5..bea64126d1e6b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2144,7 +2144,7 @@ def take( levels=self.levels, codes=taken, names=self.names, verify_integrity=False ) - def append(self, other): + def append(self, other, concat_indexes = False): """ Append a collection of Index options together @@ -2165,7 +2165,7 @@ def append(self, other): arrays = [] for i in range(self.nlevels): label = self._get_level_values(i) - if label.names[0]: + if label.names[0] and concat_indexes == True: appended = [o._get_level_values (o.names.index(label.names[0])) for o in other] else: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index b3b453ea6355a..5dfc7553f16dd 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -611,7 +611,7 @@ def _maybe_check_integrity(self, concat_index: Index): def _concat_indexes(indexes) -> Index: - return indexes[0].append(indexes[1:]) + return indexes[0].append(indexes[1:], concat_indexes=True) def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex: From b03448674c4243088ee271661cd711593c35f73c Mon Sep 17 00:00:00 2001 From: taytzehao Date: Sun, 9 May 2021 10:15:11 +0800 Subject: [PATCH 04/14] Resolved test_sparse_array conflict --- doc/source/whatsnew/v1.3.0.rst | 3 ++- pandas/core/reshape/concat.py | 2 +- pandas/tests/arrays/sparse/test_array.py | 26 ++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1024db33edf9c..dd6af0b1d7dd9 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -806,7 +806,6 @@ MultiIndex - Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in result (:issue:`38623`) - Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when :class:`MultiIndex` containing ``NaN`` even when they are differently ordered (:issue:`38439`) - Bug in :meth:`MultiIndex.intersection` always returning empty when intersecting with :class:`CategoricalIndex` (:issue:`38653`) -- Bug in :meth:`DataFrame.concat` does not match index names when concatenating two dataframes with a multiindex (:issue:`40849`) I/O @@ -915,6 +914,8 @@ Reshaping - Bug in :func:`to_datetime` raising error when input sequence contains unhashable items (:issue:`39756`) - Bug in :meth:`Series.explode` preserving index when ``ignore_index`` was ``True`` and values were scalars (:issue:`40487`) - Bug in :func:`to_datetime` raising ``ValueError`` when :class:`Series` contains ``None`` and ``NaT`` and has more than 50 elements (:issue:`39882`) +- Bug in :meth:`DataFrame.concat` does not match index names when concatenating two dataframes with a multiindex (:issue:`40849`) + Sparse ^^^^^^ diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 5dfc7553f16dd..b3b453ea6355a 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -611,7 +611,7 @@ def _maybe_check_integrity(self, concat_index: Index): def _concat_indexes(indexes) -> Index: - return indexes[0].append(indexes[1:], concat_indexes=True) + return indexes[0].append(indexes[1:]) def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex: diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index a96e5b07b7f7e..17fa826bf0bee 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1333,3 +1333,29 @@ def test_maxmin(self, raw_data, max_expected, min_expected): min_result = SparseArray(raw_data).min() assert max_result in max_expected assert min_result in min_expected + + +def test_concat_with_different_index_arrangement(): + df_first = pd.DataFrame( + [["i1_top", "i2_top", 1]], columns=["index1", "index2", "value1"] + ) + + df_second = pd.DataFrame( + [["i1_middle", "i2_middle", 1]], columns=["index1", "index2", "value1"] + ) + + df_concatenated_result = pd.concat([df_first, df_second], ignore_index=True) + df_concatenated_expected = pd.DataFrame( + [["i1_top", "i2_top", 1], ["i1_middle", "i2_middle", 1]], + columns=["index1", "index2", "value1"], + ) + + tm.assert_frame_equal(df_concatenated_result, df_concatenated_expected) + + df_first.set_index(["index1", "index2"], inplace=True) + df_second.set_index(["index2", "index1"], inplace=True) + + df_concatenated_result = pd.concat([df_first, df_second]) + df_concatenated_expected.set_index(["index1", "index2"], inplace=True) + + tm.assert_frame_equal(df_concatenated_result, df_concatenated_expected) From 0637c0fd1baa0582f612ad5ffbaafa533836a7ba Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 10 May 2021 17:56:23 +0800 Subject: [PATCH 05/14] Added multiindex concatenation with different column --- pandas/core/indexes/multi.py | 75 ++++++++++++++++++++---- pandas/tests/arrays/sparse/test_array.py | 27 ++++++--- 2 files changed, 84 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1698309df06a5..d0cb79532fbdd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -68,6 +68,7 @@ isna, ) +from pandas import NA import pandas.core.algorithms as algos from pandas.core.arrays import Categorical from pandas.core.arrays.categorical import factorize_from_iterables @@ -2163,16 +2164,29 @@ def append(self, other, concat_indexes=False): (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other ): arrays = [] - for i in range(self.nlevels): - label = self._get_level_values(i) - if label.names[0] and concat_indexes is True: - appended = [ - o._get_level_values(o.names.index(label.name)) for o in other - ] - else: - appended = [o._get_level_values(i) for o in other] - arrays.append(label.append(appended)) - return MultiIndex.from_arrays(arrays, names=self.names) + index_label_list = self.get_unique_indexes(other) + + for index_label in index_label_list: + + index = self.get_index_data( + data_index=self, column_name=index_label, other=other + ) + appended = [] + + for o in other: + + data = self.get_index_data( + data_index=o, + column_name=index_label, + other=other, + search_self=True, + ) + appended.append(data) + + index = index.append(data) + + arrays.append(index) + return MultiIndex.from_arrays(arrays, names=index_label_list) to_concat = (self._values,) + tuple(k._values for k in other) new_tuples = np.concatenate(to_concat) @@ -2183,6 +2197,47 @@ def append(self, other, concat_indexes=False): except (TypeError, IndexError): return Index(new_tuples) + def get_index_data(self, data_index, column_name, other, search_self=False): + + # Returns original data if the data_index input has data for this column name + if column_name in data_index.names: + Index_position = data_index.names.index(column_name) + data = data_index._get_level_values(Index_position) + return data + + else: + + # If the data_index input is from other and if it don't + # have the column name, it returns an Index filled with pd.NA + # with data type that the other dataframe has the column. + if search_self is True: + if column_name in self.names: + Index_position = self.names.index(column_name) + NA_type = self.levels[Index_position].dtype + data = Index([NA] * data_index.size, dtype=NA_type) + return data + + for o in other: + if o is not data_index and column_name in o.names: + Index_position = o.names.index(column_name) + NA_type = o.levels[Index_position].dtype + data = Index([NA] * data_index.size, dtype=NA_type) + return data + + def get_unique_indexes(self, other): + + Union_list = list(self.names) + + for o in other: + if not set(o.names).issubset(Union_list): + + for element in o.names: + if element not in Union_list: + + Union_list.append(element) + + return Union_list + def argsort(self, *args, **kwargs) -> np.ndarray: return self._values.argsort(*args, **kwargs) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 17fa826bf0bee..02b6029227771 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1339,23 +1339,34 @@ def test_concat_with_different_index_arrangement(): df_first = pd.DataFrame( [["i1_top", "i2_top", 1]], columns=["index1", "index2", "value1"] ) - df_second = pd.DataFrame( - [["i1_middle", "i2_middle", 1]], columns=["index1", "index2", "value1"] + [["i1_middle", "i2_middle", 1]], columns=["index1", "index3", "value1"] + ) + df_third = pd.DataFrame( + [["i1_bottom", "i2_bottom", 1]], columns=["index1", "index4", "value1"] ) - df_concatenated_result = pd.concat([df_first, df_second], ignore_index=True) + df_concatenated_result = pd.concat( + [df_first, df_second, df_third], ignore_index=True + ) df_concatenated_expected = pd.DataFrame( - [["i1_top", "i2_top", 1], ["i1_middle", "i2_middle", 1]], - columns=["index1", "index2", "value1"], + [ + ["i1_top", "i2_top", 1, pd.NA, pd.NA], + ["i1_middle", pd.NA, 1, "i2_middle", pd.NA], + ["i1_bottom", pd.NA, 1, pd.NA, "i2_bottom"], + ], + columns=["index1", "index2", "value1", "index3", "index4"], ) tm.assert_frame_equal(df_concatenated_result, df_concatenated_expected) df_first.set_index(["index1", "index2"], inplace=True) - df_second.set_index(["index2", "index1"], inplace=True) + df_second.set_index(["index3", "index1"], inplace=True) + df_third.set_index(["index4", "index1"], inplace=True) - df_concatenated_result = pd.concat([df_first, df_second]) - df_concatenated_expected.set_index(["index1", "index2"], inplace=True) + df_concatenated_result = pd.concat([df_first, df_second, df_third]) + df_concatenated_expected.set_index( + ["index1", "index2", "index3", "index4"], inplace=True + ) tm.assert_frame_equal(df_concatenated_result, df_concatenated_expected) From d09ddbe5ab5feeba02a30e18404276f3cbad1568 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 10 May 2021 18:11:04 +0800 Subject: [PATCH 06/14] Added NA library --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d0cb79532fbdd..d970c42459d8e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -26,6 +26,7 @@ lib, ) from pandas._libs.hashtable import duplicated_int64 +from pandas._libs.missing import NA from pandas._typing import ( AnyArrayLike, DtypeObj, @@ -68,7 +69,6 @@ isna, ) -from pandas import NA import pandas.core.algorithms as algos from pandas.core.arrays import Categorical from pandas.core.arrays.categorical import factorize_from_iterables From 6056f879c75a16b25c9a993eedd723d9bdc2a51f Mon Sep 17 00:00:00 2001 From: taytzehao Date: Tue, 11 May 2021 10:38:18 +0800 Subject: [PATCH 07/14] Resolved bug for multiple None column --- pandas/core/indexes/multi.py | 42 +++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d970c42459d8e..b726c173b164d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2164,28 +2164,40 @@ def append(self, other, concat_indexes=False): (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other ): arrays = [] - index_label_list = self.get_unique_indexes(other) + if self.names.count(None) > 1 or any( + o.names.count(None) > 1 for o in other + ): - for index_label in index_label_list: + for i in range(self.nlevels): - index = self.get_index_data( - data_index=self, column_name=index_label, other=other - ) - appended = [] + label = self._get_level_values(i) + appended = [o._get_level_values(i) for o in other] + arrays.append(label.append(appended)) + index_label_list = self.names + + else: + index_label_list = self.get_unique_indexes(other) - for o in other: + for index_label in index_label_list: - data = self.get_index_data( - data_index=o, - column_name=index_label, - other=other, - search_self=True, + index = self.get_index_data( + data_index=self, column_name=index_label, other=other ) - appended.append(data) + appended = [] + + for o in other: + + data = self.get_index_data( + data_index=o, + column_name=index_label, + other=other, + search_self=True, + ) + appended.append(data) - index = index.append(data) + index = index.append(data) - arrays.append(index) + arrays.append(index) return MultiIndex.from_arrays(arrays, names=index_label_list) to_concat = (self._values,) + tuple(k._values for k in other) From a881d1767cab73573189b99a484d30ee2a00563f Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 24 May 2021 01:26:06 +0800 Subject: [PATCH 08/14] Addressed comments --- asv_bench/benchmarks/multiindex_object.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py index 25df5b0214959..17ded2ef2f9b1 100644 --- a/asv_bench/benchmarks/multiindex_object.py +++ b/asv_bench/benchmarks/multiindex_object.py @@ -232,4 +232,17 @@ def time_operation(self, index_structure, dtype, method): getattr(self.left, method)(self.right) +class Append: + def setup(self): + self.MI1 = MultiIndex.from_arrays( + [np.arange(10), np.arange(100, 200, 10)], names=["first", "second"] + ) + self.MI2 = MultiIndex.from_arrays( + [np.arange(10, 20, 1), np.arange(200, 300, 10)], names=["first", "third"] + ) + + def time_append(self): + self.MI1.append(self.MI2) + + from .pandas_vb_common import setup # noqa: F401 isort:skip From 6a03335bdab62077df68e6216779f78f8260a4f3 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 24 May 2021 01:39:14 +0800 Subject: [PATCH 09/14] Addressed comments --- pandas/core/indexes/multi.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f1eb5331979db..8138b78ec2815 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -45,7 +45,10 @@ doc, ) -from pandas.core.dtypes.cast import coerce_indexer_dtype +from pandas.core.dtypes.cast import ( + coerce_indexer_dtype, + convert_dtypes, +) from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, @@ -2146,7 +2149,7 @@ def take( levels=self.levels, codes=taken, names=self.names, verify_integrity=False ) - def append(self, other, concat_indexes=False): + def append(self, other): """ Append a collection of Index options together @@ -2169,12 +2172,7 @@ def append(self, other, concat_indexes=False): o.names.count(None) > 1 for o in other ): - for i in range(self.nlevels): - - label = self._get_level_values(i) - appended = [o._get_level_values(i) for o in other] - arrays.append(label.append(appended)) - index_label_list = self.names + arrays, index_label_list = self.simple_append(other=other) else: index_label_list = self.get_unique_indexes(other) @@ -2210,6 +2208,19 @@ def append(self, other, concat_indexes=False): except (TypeError, IndexError): return Index(new_tuples) + def simple_append(self, other): + + arr = [] + + for i in range(self.nlevels): + + label = self._get_level_values(i) + appended = [o._get_level_values(i) for o in other] + arr.append(label.append(appended)) + index_label_list = self.names + + return arr, index_label_list + def get_index_data(self, data_index, column_name, other, search_self=False): # Returns original data if the data_index input has data for this column name @@ -2226,14 +2237,14 @@ def get_index_data(self, data_index, column_name, other, search_self=False): if search_self is True: if column_name in self.names: Index_position = self.names.index(column_name) - NA_type = self.levels[Index_position].dtype + NA_type = convert_dtypes(self.levels[Index_position]) data = Index([NA] * data_index.size, dtype=NA_type) return data for o in other: if o is not data_index and column_name in o.names: Index_position = o.names.index(column_name) - NA_type = o.levels[Index_position].dtype + NA_type = convert_dtypes(o.levels[Index_position].dtype) data = Index([NA] * data_index.size, dtype=NA_type) return data From dc3e10e158165b8c5ab2fc1ba607416a5b55ca05 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 24 May 2021 11:08:50 +0800 Subject: [PATCH 10/14] Removed single index test --- pandas/core/indexes/multi.py | 4 ++-- pandas/tests/arrays/sparse/conftest.py | 0 pandas/tests/arrays/sparse/test_array.py | 6 +----- 3 files changed, 3 insertions(+), 7 deletions(-) create mode 100644 pandas/tests/arrays/sparse/conftest.py diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8138b78ec2815..351e30ed16e25 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2237,14 +2237,14 @@ def get_index_data(self, data_index, column_name, other, search_self=False): if search_self is True: if column_name in self.names: Index_position = self.names.index(column_name) - NA_type = convert_dtypes(self.levels[Index_position]) + NA_type = convert_dtypes(self) data = Index([NA] * data_index.size, dtype=NA_type) return data for o in other: if o is not data_index and column_name in o.names: Index_position = o.names.index(column_name) - NA_type = convert_dtypes(o.levels[Index_position].dtype) + NA_type = convert_dtypes(o) data = Index([NA] * data_index.size, dtype=NA_type) return data diff --git a/pandas/tests/arrays/sparse/conftest.py b/pandas/tests/arrays/sparse/conftest.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 9de72e85ad48b..510d410427493 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1354,9 +1354,7 @@ def test_concat_with_different_index_arrangement(): [["i1_bottom", "i2_bottom", 1]], columns=["index1", "index4", "value1"] ) - df_concatenated_result = pd.concat( - [df_first, df_second, df_third], ignore_index=True - ) + df_concatenated_result = pd.concat([df_first, df_second, df_third]) df_concatenated_expected = pd.DataFrame( [ ["i1_top", "i2_top", 1, pd.NA, pd.NA], @@ -1366,8 +1364,6 @@ def test_concat_with_different_index_arrangement(): columns=["index1", "index2", "value1", "index3", "index4"], ) - tm.assert_frame_equal(df_concatenated_result, df_concatenated_expected) - df_first.set_index(["index1", "index2"], inplace=True) df_second.set_index(["index3", "index1"], inplace=True) df_third.set_index(["index4", "index1"], inplace=True) From 8ee98650f9bcbd920fff56039f7948720eb9bd8c Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 24 May 2021 11:11:45 +0800 Subject: [PATCH 11/14] Removed single index test --- pandas/tests/arrays/sparse/test_array.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 510d410427493..2d2c656de4960 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1354,7 +1354,6 @@ def test_concat_with_different_index_arrangement(): [["i1_bottom", "i2_bottom", 1]], columns=["index1", "index4", "value1"] ) - df_concatenated_result = pd.concat([df_first, df_second, df_third]) df_concatenated_expected = pd.DataFrame( [ ["i1_top", "i2_top", 1, pd.NA, pd.NA], From a714d0fed50045043f75cc380527ab74505a00cf Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 24 May 2021 20:37:52 +0800 Subject: [PATCH 12/14] Delete conftest --- pandas/tests/arrays/sparse/conftest.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 pandas/tests/arrays/sparse/conftest.py diff --git a/pandas/tests/arrays/sparse/conftest.py b/pandas/tests/arrays/sparse/conftest.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 From 77f53ae64054dc02055fafbf6d589be8520de4cf Mon Sep 17 00:00:00 2001 From: taytzehao Date: Mon, 24 May 2021 23:09:11 +0800 Subject: [PATCH 13/14] Resolve CI error --- pandas/core/indexes/multi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 351e30ed16e25..d206dffc059c5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2237,14 +2237,14 @@ def get_index_data(self, data_index, column_name, other, search_self=False): if search_self is True: if column_name in self.names: Index_position = self.names.index(column_name) - NA_type = convert_dtypes(self) + NA_type = convert_dtypes(self._get_level_values(Index_position)) data = Index([NA] * data_index.size, dtype=NA_type) return data for o in other: if o is not data_index and column_name in o.names: Index_position = o.names.index(column_name) - NA_type = convert_dtypes(o) + NA_type = convert_dtypes(o._get_level_values(Index_position)) data = Index([NA] * data_index.size, dtype=NA_type) return data From fbfbb9bde4353edd2a0cbedfb262ffac61f1c47a Mon Sep 17 00:00:00 2001 From: taytzehao Date: Tue, 25 May 2021 07:40:18 +0800 Subject: [PATCH 14/14] Resolve CI issue --- pandas/core/indexes/multi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d206dffc059c5..427ac310bcf8b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2237,14 +2237,14 @@ def get_index_data(self, data_index, column_name, other, search_self=False): if search_self is True: if column_name in self.names: Index_position = self.names.index(column_name) - NA_type = convert_dtypes(self._get_level_values(Index_position)) + NA_type = convert_dtypes(self.levels[Index_position].values) data = Index([NA] * data_index.size, dtype=NA_type) return data for o in other: if o is not data_index and column_name in o.names: Index_position = o.names.index(column_name) - NA_type = convert_dtypes(o._get_level_values(Index_position)) + NA_type = convert_dtypes(self.levels[Index_position].values) data = Index([NA] * data_index.size, dtype=NA_type) return data