diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a04ba157ce0ae..8cbc95f0349cf 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -156,7 +156,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`) - Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`) - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) - +- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) Sparse ^^^^^^ diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 4072d06b9427c..0a23d38ace37e 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -63,7 +63,7 @@ def get_objs_combined_axis( - objs, intersect: bool = False, axis=0, sort: bool = True + objs, intersect: bool = False, axis=0, sort: bool = True, copy: bool = False ) -> Index: """ Extract combined index: return intersection or union (depending on the @@ -81,13 +81,15 @@ def get_objs_combined_axis( The axis to extract indexes from. sort : bool, default True Whether the result index should come out sorted or not. + copy : bool, default False + If True, return a copy of the combined index. Returns ------- Index """ obs_idxes = [obj._get_axis(axis) for obj in objs] - return _get_combined_index(obs_idxes, intersect=intersect, sort=sort) + return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy) def _get_distinct_objs(objs: List[Index]) -> List[Index]: @@ -105,7 +107,10 @@ def _get_distinct_objs(objs: List[Index]) -> List[Index]: def _get_combined_index( - indexes: List[Index], intersect: bool = False, sort: bool = False + indexes: List[Index], + intersect: bool = False, + sort: bool = False, + copy: bool = False, ) -> Index: """ Return the union or intersection of indexes. @@ -119,6 +124,8 @@ def _get_combined_index( calculate the union. sort : bool, default False Whether the result index should come out sorted or not. + copy : bool, default False + If True, return a copy of the combined index. Returns ------- @@ -143,6 +150,11 @@ def _get_combined_index( index = index.sort_values() except TypeError: pass + + # GH 29879 + if copy: + index = index.copy() + return index diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 9528de36a3664..b42497b507e1f 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -517,7 +517,11 @@ def _get_new_axes(self) -> List[Index]: def _get_comb_axis(self, i: int) -> Index: data_axis = self.objs[0]._get_block_manager_axis(i) return get_objs_combined_axis( - self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort + self.objs, + axis=data_axis, + intersect=self.intersect, + sort=self.sort, + copy=self.copy, ) def _get_concat_axis(self) -> Index: diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index b3b2c5a05c6ad..5811f3bc196a1 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2750,3 +2750,17 @@ def test_concat_sparse(): ) result = pd.concat([a, a], axis=1) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("test_series", [True, False]) +def test_concat_copy_index(test_series, axis): + # GH 29879 + if test_series: + ser = Series([1, 2]) + comb = concat([ser, ser], axis=axis, copy=True) + assert comb.index is not ser.index + else: + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + comb = concat([df, df], axis=axis, copy=True) + assert comb.index is not df.index + assert comb.columns is not df.columns