72
72
73
73
74
74
def _concatenate_array_managers (
75
- mgrs_indexers , axes : list [Index ], concat_axis : AxisInt , copy : bool
75
+ mgrs : list [ Manager ] , axes : list [Index ], concat_axis : AxisInt
76
76
) -> Manager :
77
77
"""
78
78
Concatenate array managers into one.
@@ -82,27 +82,11 @@ def _concatenate_array_managers(
82
82
mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples
83
83
axes : list of Index
84
84
concat_axis : int
85
- copy : bool
86
85
87
86
Returns
88
87
-------
89
88
ArrayManager
90
89
"""
91
- # reindex all arrays
92
- mgrs = []
93
- for mgr , indexers in mgrs_indexers :
94
- axis1_made_copy = False
95
- for ax , indexer in indexers .items ():
96
- mgr = mgr .reindex_indexer (
97
- axes [ax ], indexer , axis = ax , allow_dups = True , use_na_proxy = True
98
- )
99
- if ax == 1 and indexer is not None :
100
- axis1_made_copy = True
101
- if copy and concat_axis == 0 and not axis1_made_copy :
102
- # for concat_axis 1 we will always get a copy through concat_arrays
103
- mgr = mgr .copy ()
104
- mgrs .append (mgr )
105
-
106
90
if concat_axis == 1 :
107
91
# concatting along the rows -> concat the reindexed arrays
108
92
# TODO(ArrayManager) doesn't yet preserve the correct dtype
@@ -192,9 +176,18 @@ def concatenate_managers(
192
176
-------
193
177
BlockManager
194
178
"""
179
+
180
+ needs_copy = copy and concat_axis == 0
181
+
195
182
# TODO(ArrayManager) this assumes that all managers are of the same type
196
183
if isinstance (mgrs_indexers [0 ][0 ], ArrayManager ):
197
- return _concatenate_array_managers (mgrs_indexers , axes , concat_axis , copy )
184
+ mgrs = _maybe_reindex_columns_na_proxy (axes , mgrs_indexers , needs_copy )
185
+ # error: Argument 1 to "_concatenate_array_managers" has incompatible
186
+ # type "List[BlockManager]"; expected "List[Union[ArrayManager,
187
+ # SingleArrayManager, BlockManager, SingleBlockManager]]"
188
+ return _concatenate_array_managers (
189
+ mgrs , axes , concat_axis # type: ignore[arg-type]
190
+ )
198
191
199
192
# Assertions disabled for performance
200
193
# for tup in mgrs_indexers:
@@ -203,7 +196,8 @@ def concatenate_managers(
203
196
# assert concat_axis not in indexers
204
197
205
198
if concat_axis == 0 :
206
- return _concat_managers_axis0 (mgrs_indexers , axes , copy )
199
+ mgrs = _maybe_reindex_columns_na_proxy (axes , mgrs_indexers , needs_copy )
200
+ return _concat_managers_axis0 (mgrs , axes )
207
201
208
202
if len (mgrs_indexers ) > 0 and mgrs_indexers [0 ][0 ].nblocks > 0 :
209
203
first_dtype = mgrs_indexers [0 ][0 ].blocks [0 ].dtype
@@ -220,19 +214,15 @@ def concatenate_managers(
220
214
nb = _concat_homogeneous_fastpath (mgrs_indexers , shape , first_dtype )
221
215
return BlockManager ((nb ,), axes )
222
216
223
- mgrs_indexers = _maybe_reindex_columns_na_proxy (axes , mgrs_indexers )
224
- if len (mgrs_indexers ) == 1 :
225
- mgr , indexers = mgrs_indexers [0 ]
226
- # Assertion correct but disabled for perf:
227
- # assert not indexers
228
- if copy :
229
- out = mgr .copy (deep = True )
230
- else :
231
- out = mgr .copy (deep = False )
217
+ mgrs = _maybe_reindex_columns_na_proxy (axes , mgrs_indexers , needs_copy )
218
+
219
+ if len (mgrs ) == 1 :
220
+ mgr = mgrs [0 ]
221
+ out = mgr .copy (deep = False )
232
222
out .axes = axes
233
223
return out
234
224
235
- concat_plan = _get_combined_plan ([ mgr for mgr , _ in mgrs_indexers ] )
225
+ concat_plan = _get_combined_plan (mgrs )
236
226
237
227
blocks = []
238
228
values : ArrayLike
@@ -277,35 +267,20 @@ def concatenate_managers(
277
267
return BlockManager (tuple (blocks ), axes )
278
268
279
269
280
- def _concat_managers_axis0 (
281
- mgrs_indexers , axes : list [Index ], copy : bool
282
- ) -> BlockManager :
270
+ def _concat_managers_axis0 (mgrs : list [BlockManager ], axes : list [Index ]) -> BlockManager :
283
271
"""
284
272
concat_managers specialized to concat_axis=0, with reindexing already
285
273
having been done in _maybe_reindex_columns_na_proxy.
286
274
"""
287
- had_reindexers = {
288
- i : len (mgrs_indexers [i ][1 ]) > 0 for i in range (len (mgrs_indexers ))
289
- }
290
- mgrs_indexers = _maybe_reindex_columns_na_proxy (axes , mgrs_indexers )
291
-
292
- mgrs : list [BlockManager ] = [x [0 ] for x in mgrs_indexers ]
293
275
294
276
offset = 0
295
277
blocks : list [Block ] = []
296
278
for i , mgr in enumerate (mgrs ):
297
- # If we already reindexed, then we definitely don't need another copy
298
- made_copy = had_reindexers [i ]
299
-
300
279
for blk in mgr .blocks :
301
- if made_copy :
302
- nb = blk .copy (deep = False )
303
- elif copy :
304
- nb = blk .copy ()
305
- else :
306
- # by slicing instead of copy(deep=False), we get a new array
307
- # object, see test_concat_copy
308
- nb = blk .getitem_block (slice (None ))
280
+ # We need to do getitem_block here otherwise we would be altering
281
+ # blk.mgr_locs in place, which would render it invalid. This is only
282
+ # relevant in the copy=False case.
283
+ nb = blk .getitem_block (slice (None ))
309
284
nb ._mgr_locs = nb ._mgr_locs .add (offset )
310
285
blocks .append (nb )
311
286
@@ -316,16 +291,18 @@ def _concat_managers_axis0(
316
291
317
292
318
293
def _maybe_reindex_columns_na_proxy (
319
- axes : list [Index ], mgrs_indexers : list [tuple [BlockManager , dict [int , np .ndarray ]]]
320
- ) -> list [tuple [BlockManager , dict [int , np .ndarray ]]]:
294
+ axes : list [Index ],
295
+ mgrs_indexers : list [tuple [BlockManager , dict [int , np .ndarray ]]],
296
+ needs_copy : bool ,
297
+ ) -> list [BlockManager ]:
321
298
"""
322
299
Reindex along columns so that all of the BlockManagers being concatenated
323
300
have matching columns.
324
301
325
302
Columns added in this reindexing have dtype=np.void, indicating they
326
303
should be ignored when choosing a column's final dtype.
327
304
"""
328
- new_mgrs_indexers : list [ tuple [ BlockManager , dict [ int , np . ndarray ]]] = []
305
+ new_mgrs = []
329
306
330
307
for mgr , indexers in mgrs_indexers :
331
308
# For axis=0 (i.e. columns) we use_na_proxy and only_slice, so this
@@ -340,8 +317,11 @@ def _maybe_reindex_columns_na_proxy(
340
317
allow_dups = True ,
341
318
use_na_proxy = True , # only relevant for i==0
342
319
)
343
- new_mgrs_indexers .append ((mgr , {}))
344
- return new_mgrs_indexers
320
+ if needs_copy and not indexers :
321
+ mgr = mgr .copy ()
322
+
323
+ new_mgrs .append (mgr )
324
+ return new_mgrs
345
325
346
326
347
327
def _is_homogeneous_mgr (mgr : BlockManager , first_dtype : DtypeObj ) -> bool :
0 commit comments