From 68d7a11a3f301a37a713510de17b5089466786bc Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sat, 18 Jan 2020 15:47:16 +0800 Subject: [PATCH 1/6] BUG: concat not copying index and columns when copy=True (GH29879) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/reshape/concat.py | 5 ++++- pandas/tests/reshape/test_concat.py | 13 +++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index fa562838c8f7c..cf55ba431370d 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1133,6 +1133,7 @@ Reshaping - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) - Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`) - Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) +- Bug in :func:`concat` index and columns not copied when ``copy=True`` (:issue:`29879`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 9528de36a3664..2c233ef8811fc 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -516,9 +516,12 @@ def _get_new_axes(self) -> List[Index]: def _get_comb_axis(self, i: int) -> Index: data_axis = self.objs[0]._get_block_manager_axis(i) - return get_objs_combined_axis( + comb_axis = get_objs_combined_axis( self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort ) + # GH 29879 + # No need for deep copy. Indexes are immutable so they can share underlying data + return comb_axis.copy(deep=False) if self.copy else comb_axis def _get_concat_axis(self) -> Index: """ diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index b3b2c5a05c6ad..5c15226e8fa31 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2750,3 +2750,16 @@ def test_concat_sparse(): ) result = pd.concat([a, a], axis=1) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("axis", [0, 1]) +def test_concat_copy_index(axis): + # GH 29879 + df = pd.DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + df_comb = pd.concat([df, df], axis=axis, copy=True) + ser = df["a"] + ser_comb = pd.concat([ser, ser], axis=axis, copy=True) + + assert df_comb.index is not df.index + assert df_comb.columns is not df.columns + assert ser_comb.index is not ser.index From 1125589af45d14d49aaade551eff8cddb0cba860 Mon Sep 17 00:00:00 2001 From: Jiaxiang Date: Sat, 18 Jan 2020 23:11:29 +0800 Subject: [PATCH 2/6] Update pandas/tests/reshape/test_concat.py Co-Authored-By: Simon Hawkins --- pandas/tests/reshape/test_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 5c15226e8fa31..173d00eea9a30 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2755,7 +2755,7 @@ def test_concat_sparse(): @pytest.mark.parametrize("axis", [0, 1]) def test_concat_copy_index(axis): # GH 29879 - df = pd.DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) df_comb = pd.concat([df, df], axis=axis, copy=True) ser = df["a"] ser_comb = pd.concat([ser, ser], axis=axis, copy=True) From 71b6117426ba4b9f0b549715c6a82e99436dd1a1 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sat, 18 Jan 2020 23:29:58 +0800 Subject: [PATCH 3/6] BUG: updated tests and whatsnew (GH29879) --- doc/source/whatsnew/v1.0.0.rst | 1 - doc/source/whatsnew/v1.1.0.rst | 1 + pandas/tests/reshape/test_concat.py | 21 +++++++++++---------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index cf55ba431370d..fa562838c8f7c 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1133,7 +1133,6 @@ Reshaping - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) - Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`) - Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) -- Bug in :func:`concat` index and columns not copied when ``copy=True`` (:issue:`29879`) Sparse ^^^^^^ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index b5a7b19f160a4..7219f09d1a292 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -142,6 +142,7 @@ Reshaping - - Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) +- Bug in :func:`concat` index and columns not copied when ``copy=True`` (:issue:`29879`) Sparse ^^^^^^ diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 173d00eea9a30..5811f3bc196a1 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2752,14 +2752,15 @@ def test_concat_sparse(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("axis", [0, 1]) -def test_concat_copy_index(axis): +@pytest.mark.parametrize("test_series", [True, False]) +def test_concat_copy_index(test_series, axis): # GH 29879 - df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) - df_comb = pd.concat([df, df], axis=axis, copy=True) - ser = df["a"] - ser_comb = pd.concat([ser, ser], axis=axis, copy=True) - - assert df_comb.index is not df.index - assert df_comb.columns is not df.columns - assert ser_comb.index is not ser.index + if test_series: + ser = Series([1, 2]) + comb = concat([ser, ser], axis=axis, copy=True) + assert comb.index is not ser.index + else: + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + comb = concat([df, df], axis=axis, copy=True) + assert comb.index is not df.index + assert comb.columns is not df.columns From 38c1a2f52cb784bb0e6e15e6538b9058433f8ed6 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sun, 19 Jan 2020 11:33:45 +0800 Subject: [PATCH 4/6] BUG: refactored code (GH29879) --- pandas/core/indexes/api.py | 19 ++++++++++++++++--- pandas/core/reshape/concat.py | 11 ++++++----- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 4072d06b9427c..40c0ebaa385ab 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -63,7 +63,7 @@ def get_objs_combined_axis( - objs, intersect: bool = False, axis=0, sort: bool = True + objs, intersect: bool = False, axis=0, sort: bool = True, copy: bool = False ) -> Index: """ Extract combined index: return intersection or union (depending on the @@ -81,13 +81,15 @@ def get_objs_combined_axis( The axis to extract indexes from. sort : bool, default True Whether the result index should come out sorted or not. + copy : bool, default False + If True, return a copy of the combined index. Returns ------- Index """ obs_idxes = [obj._get_axis(axis) for obj in objs] - return _get_combined_index(obs_idxes, intersect=intersect, sort=sort) + return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy) def _get_distinct_objs(objs: List[Index]) -> List[Index]: @@ -105,7 +107,10 @@ def _get_distinct_objs(objs: List[Index]) -> List[Index]: def _get_combined_index( - indexes: List[Index], intersect: bool = False, sort: bool = False + indexes: List[Index], + intersect: bool = False, + sort: bool = False, + copy: bool = False, ) -> Index: """ Return the union or intersection of indexes. @@ -119,6 +124,8 @@ def _get_combined_index( calculate the union. sort : bool, default False Whether the result index should come out sorted or not. + copy : bool, default False + If True, return a copy of the combined index. Returns ------- @@ -143,6 +150,12 @@ def _get_combined_index( index = index.sort_values() except TypeError: pass + + # GH 29879 + if copy: + # No need for deep copy. Indexes are immutable so they can share underlying data + index = index.copy(deep=False) + return index diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 2c233ef8811fc..b42497b507e1f 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -516,12 +516,13 @@ def _get_new_axes(self) -> List[Index]: def _get_comb_axis(self, i: int) -> Index: data_axis = self.objs[0]._get_block_manager_axis(i) - comb_axis = get_objs_combined_axis( - self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort + return get_objs_combined_axis( + self.objs, + axis=data_axis, + intersect=self.intersect, + sort=self.sort, + copy=self.copy, ) - # GH 29879 - # No need for deep copy. Indexes are immutable so they can share underlying data - return comb_axis.copy(deep=False) if self.copy else comb_axis def _get_concat_axis(self) -> Index: """ From c5db0ccdba395ccc6ce4e21b09522d4471d32cc8 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Sun, 19 Jan 2020 11:40:44 +0800 Subject: [PATCH 5/6] BUG: updated details in a comment (GH29879) --- pandas/core/indexes/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 40c0ebaa385ab..c252bdbb7ca15 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -153,7 +153,8 @@ def _get_combined_index( # GH 29879 if copy: - # No need for deep copy. Indexes are immutable so they can share underlying data + # No need for deep copy. The underlying data of Indexes are + # considered immutable so they can be shared index = index.copy(deep=False) return index From 151535152b153d95626921b2dad071aca7170088 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 21 Jan 2020 09:45:13 +0800 Subject: [PATCH 6/6] CLN: cleaned up code and comment (GH29879) --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/indexes/api.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index aaba575a31fa6..2a6779d0f2118 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -144,7 +144,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) - Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`) - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) -- Bug in :func:`concat` index and columns not copied when ``copy=True`` (:issue:`29879`) +- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) Sparse ^^^^^^ diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index c252bdbb7ca15..0a23d38ace37e 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -153,9 +153,7 @@ def _get_combined_index( # GH 29879 if copy: - # No need for deep copy. The underlying data of Indexes are - # considered immutable so they can be shared - index = index.copy(deep=False) + index = index.copy() return index