Skip to content

PERF: avoid copy in concatenate_array_managers if reindex already copies #44559

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
4 changes: 2 additions & 2 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,8 +527,8 @@ def copy_func(ax):
if deep:
new_arrays = [arr.copy() for arr in self.arrays]
else:
new_arrays = self.arrays
return type(self)(new_arrays, new_axes)
new_arrays = list(self.arrays)
return type(self)(new_arrays, new_axes, verify_integrity=False)

def reindex_indexer(
self: T,
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,16 @@ def _concatenate_array_managers(
# reindex all arrays
mgrs = []
for mgr, indexers in mgrs_indexers:
axis1_made_copy = False
for ax, indexer in indexers.items():
mgr = mgr.reindex_indexer(
axes[ax], indexer, axis=ax, allow_dups=True, use_na_proxy=True
)
if ax == 1 and indexer is not None:
axis1_made_copy = True
if copy and concat_axis == 0 and not axis1_made_copy:
# for concat_axis 1 we will always get a copy through concat_arrays
mgr = mgr.copy()
mgrs.append(mgr)

if concat_axis == 1:
Expand All @@ -94,8 +100,6 @@ def _concatenate_array_managers(
# concatting along the columns -> combine reindexed arrays in a single manager
assert concat_axis == 0
arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs]))
if copy:
arrays = [x.copy() for x in arrays]

new_mgr = ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False)
return new_mgr
Expand Down