Skip to content

Commit 91e6dda

Browse files
committed
Merge pull request #8331 from jreback/concat_copy
PERF: add copy=True argument to pd.concat to enable pass-thru concats with complete blocks (GH8252)
2 parents 7c319fd + 7a1ffc7 commit 91e6dda

File tree

3 files changed

+43
-4
lines changed

3 files changed

+43
-4
lines changed

doc/source/v0.15.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ API changes
276276
Index(['a','b','c']).difference(Index(['b','c','d']))
277277

278278
- ``DataFrame.info()`` now ends its output with a newline character (:issue:`8114`)
279+
- add ``copy=True`` argument to ``pd.concat`` to enable pass thrue of complete blocks (:issue:`8252`)
279280

280281
.. _whatsnew_0150.dt:
281282

pandas/tools/merge.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ def _sort_labels(uniques, left, right):
666666

667667

668668
def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
669-
keys=None, levels=None, names=None, verify_integrity=False):
669+
keys=None, levels=None, names=None, verify_integrity=False, copy=True):
670670
"""
671671
Concatenate pandas objects along a particular axis with optional set logic
672672
along the other axes. Can also add a layer of hierarchical indexing on the
@@ -704,6 +704,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
704704
concatenating objects where the concatenation axis does not have
705705
meaningful indexing information. Note the the index values on the other
706706
axes are still respected in the join.
707+
copy : boolean, default True
708+
If False, do not copy data unnecessarily
707709
708710
Notes
709711
-----
@@ -716,7 +718,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
716718
op = _Concatenator(objs, axis=axis, join_axes=join_axes,
717719
ignore_index=ignore_index, join=join,
718720
keys=keys, levels=levels, names=names,
719-
verify_integrity=verify_integrity)
721+
verify_integrity=verify_integrity,
722+
copy=copy)
720723
return op.get_result()
721724

722725

@@ -727,7 +730,7 @@ class _Concatenator(object):
727730

728731
def __init__(self, objs, axis=0, join='outer', join_axes=None,
729732
keys=None, levels=None, names=None,
730-
ignore_index=False, verify_integrity=False):
733+
ignore_index=False, verify_integrity=False, copy=True):
731734
if not isinstance(objs, (list,tuple,types.GeneratorType,dict,TextFileReader)):
732735
raise TypeError('first argument must be a list-like of pandas '
733736
'objects, you passed an object of type '
@@ -846,6 +849,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
846849

847850
self.ignore_index = ignore_index
848851
self.verify_integrity = verify_integrity
852+
self.copy = copy
849853

850854
self.new_axes = self._get_new_axes()
851855

@@ -879,7 +883,9 @@ def get_result(self):
879883
mgrs_indexers.append((obj._data, indexers))
880884

881885
new_data = concatenate_block_managers(
882-
mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=True)
886+
mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy)
887+
if not self.copy:
888+
new_data._consolidate_inplace()
883889

884890
return self.objs[0]._from_axes(new_data, self.new_axes).__finalize__(self, method='concat')
885891

pandas/tools/tests/test_merge.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1485,6 +1485,38 @@ def test_append_missing_column_proper_upcast(self):
14851485
self.assertEqual(appended['A'].dtype, 'f8')
14861486
self.assertEqual(appended['B'].dtype, 'O')
14871487

1488+
def test_concat_copy(self):
1489+
1490+
df = DataFrame(np.random.randn(4, 3))
1491+
df2 = DataFrame(np.random.randint(0,10,size=4).reshape(4,1))
1492+
df3 = DataFrame({5 : 'foo'},index=range(4))
1493+
1494+
# these are actual copies
1495+
result = concat([df,df2,df3],axis=1,copy=True)
1496+
for b in result._data.blocks:
1497+
self.assertIsNone(b.values.base)
1498+
1499+
# these are the same
1500+
result = concat([df,df2,df3],axis=1,copy=False)
1501+
for b in result._data.blocks:
1502+
if b.is_float:
1503+
self.assertTrue(b.values.base is df._data.blocks[0].values.base)
1504+
elif b.is_integer:
1505+
self.assertTrue(b.values.base is df2._data.blocks[0].values.base)
1506+
elif b.is_object:
1507+
self.assertIsNotNone(b.values.base)
1508+
1509+
# float block was consolidated
1510+
df4 = DataFrame(np.random.randn(4,1))
1511+
result = concat([df,df2,df3,df4],axis=1,copy=False)
1512+
for b in result._data.blocks:
1513+
if b.is_float:
1514+
self.assertIsNone(b.values.base)
1515+
elif b.is_integer:
1516+
self.assertTrue(b.values.base is df2._data.blocks[0].values.base)
1517+
elif b.is_object:
1518+
self.assertIsNotNone(b.values.base)
1519+
14881520
def test_concat_with_group_keys(self):
14891521
df = DataFrame(np.random.randn(4, 3))
14901522
df2 = DataFrame(np.random.randn(4, 4))

0 commit comments

Comments
 (0)