Skip to content

BUG: concat not copying index and columns when copy=True #31119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 21, 2020
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ Reshaping
- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`)
- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`)
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)

- Bug in :func:`concat` index and columns not copied when ``copy=True`` (:issue:`29879`)

Sparse
^^^^^^
Expand Down
20 changes: 17 additions & 3 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@


def get_objs_combined_axis(
objs, intersect: bool = False, axis=0, sort: bool = True
objs, intersect: bool = False, axis=0, sort: bool = True, copy: bool = False
) -> Index:
"""
Extract combined index: return intersection or union (depending on the
Expand All @@ -81,13 +81,15 @@ def get_objs_combined_axis(
The axis to extract indexes from.
sort : bool, default True
Whether the result index should come out sorted or not.
copy : bool, default False
If True, return a copy of the combined index.

Returns
-------
Index
"""
obs_idxes = [obj._get_axis(axis) for obj in objs]
return _get_combined_index(obs_idxes, intersect=intersect, sort=sort)
return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)


def _get_distinct_objs(objs: List[Index]) -> List[Index]:
Expand All @@ -105,7 +107,10 @@ def _get_distinct_objs(objs: List[Index]) -> List[Index]:


def _get_combined_index(
indexes: List[Index], intersect: bool = False, sort: bool = False
indexes: List[Index],
intersect: bool = False,
sort: bool = False,
copy: bool = False,
) -> Index:
"""
Return the union or intersection of indexes.
Expand All @@ -119,6 +124,8 @@ def _get_combined_index(
calculate the union.
sort : bool, default False
Whether the result index should come out sorted or not.
copy : bool, default False
If True, return a copy of the combined index.

Returns
-------
Expand All @@ -143,6 +150,13 @@ def _get_combined_index(
index = index.sort_values()
except TypeError:
pass

# GH 29879
if copy:
# No need for deep copy. The underlying data of Indexes are
# considered immutable so they can be shared
index = index.copy(deep=False)

return index


Expand Down
6 changes: 5 additions & 1 deletion pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,11 @@ def _get_new_axes(self) -> List[Index]:
def _get_comb_axis(self, i: int) -> Index:
data_axis = self.objs[0]._get_block_manager_axis(i)
return get_objs_combined_axis(
self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort
self.objs,
axis=data_axis,
intersect=self.intersect,
sort=self.sort,
copy=self.copy,
)

def _get_concat_axis(self) -> Index:
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2750,3 +2750,17 @@ def test_concat_sparse():
)
result = pd.concat([a, a], axis=1)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("test_series", [True, False])
def test_concat_copy_index(test_series, axis):
# GH 29879
if test_series:
ser = Series([1, 2])
comb = concat([ser, ser], axis=axis, copy=True)
assert comb.index is not ser.index
else:
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
comb = concat([df, df], axis=axis, copy=True)
assert comb.index is not df.index
assert comb.columns is not df.columns