From 071a5c8ff314d5275def6b96bbb9ab76798dfa24 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 28 Mar 2020 15:15:19 -0700 Subject: [PATCH 1/2] REF: get placement arg out of Block.concat_same_type --- pandas/core/internals/blocks.py | 36 +++++++++---------- pandas/core/internals/managers.py | 5 ++- pandas/tests/extension/test_external_block.py | 5 ++- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b2a8c7a0864b8..c93568088862e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -326,16 +326,15 @@ def dtype(self): def merge(self, other): return _merge_blocks([self, other]) - def concat_same_type(self, to_concat, placement=None): + def concat_same_type(self, to_concat): """ Concatenate list of single blocks of the same type. """ values = self._concatenator( [blk.values for blk in to_concat], axis=self.ndim - 1 ) - return self.make_block_same_class( - values, placement=placement or slice(0, len(values), 1) - ) + placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) + return self.make_block_same_class(values, placement=placement) def iget(self, i): return self.values[i] @@ -1818,13 +1817,13 @@ def _slice(self, slicer): return self.values[slicer] - def concat_same_type(self, to_concat, placement=None): + def concat_same_type(self, to_concat): """ Concatenate list of single blocks of the same type. """ values = self._holder._concat_same_type([blk.values for blk in to_concat]) - placement = placement or slice(0, len(values), 1) - return self.make_block_same_class(values, ndim=self.ndim, placement=placement) + placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) + return self.make_block_same_class(values, placement=placement) def fillna(self, value, limit=None, inplace=False, downcast=None): values = self.values if inplace else self.values.copy() @@ -2336,19 +2335,19 @@ def diff(self, n: int, axis: int = 0) -> List["Block"]: new_values = new_values.astype("timedelta64[ns]") return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)] - def concat_same_type(self, to_concat, placement=None): + def concat_same_type(self, to_concat): # need to handle concat([tz1, tz2]) here, since DatetimeArray # only handles cases where all the tzs are the same. # Instead of placing the condition here, it could also go into the # is_uniform_join_units check, but I'm not sure what is better. if len({x.dtype for x in to_concat}) > 1: values = concat_datetime([x.values for x in to_concat]) - placement = placement or slice(0, len(values), 1) - if self.ndim > 1: - values = np.atleast_2d(values) - return ObjectBlock(values, ndim=self.ndim, placement=placement) - return super().concat_same_type(to_concat, placement) + values = values.astype(object, copy=False) + placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) + + return self.make_block(_block_shape(values, self.ndim), placement=placement) + return super().concat_same_type(to_concat) def fillna(self, value, limit=None, inplace=False, downcast=None): # We support filling a DatetimeTZ with a `value` whose timezone @@ -2802,7 +2801,7 @@ def __init__(self, values, placement, ndim=None): def _holder(self): return Categorical - def concat_same_type(self, to_concat, placement=None): + def concat_same_type(self, to_concat): """ Concatenate list of single blocks of the same type. @@ -2815,12 +2814,11 @@ def concat_same_type(self, to_concat, placement=None): 1. Change Categorical._concat_same_type to use union_categoricals 2. Delete this method. """ - values = self._concatenator( - [blk.values for blk in to_concat], axis=self.ndim - 1 - ) + values = self._holder._concat_same_type([blk.values for blk in to_concat]) + placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) # not using self.make_block_same_class as values can be object dtype - return make_block( - values, placement=placement or slice(0, len(values), 1), ndim=self.ndim + return self.make_block( + _block_shape(values, ndim=self.ndim), placement=placement ) def replace( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index dda932cafe73b..9c1cdfb89b762 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2023,9 +2023,8 @@ def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy): values = values.view() b = b.make_block_same_class(values, placement=placement) elif is_uniform_join_units(join_units): - b = join_units[0].block.concat_same_type( - [ju.block for ju in join_units], placement=placement - ) + b = join_units[0].block.concat_same_type([ju.block for ju in join_units]) + b.mgr_locs = placement else: b = make_block( concatenate_join_units(join_units, concat_axis, copy=copy), diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 26606d7e799e8..0ee9e71cd81a6 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -17,9 +17,8 @@ def concat_same_type(self, to_concat, placement=None): always 1D in this custom Block """ values = np.concatenate([blk.values for blk in to_concat]) - return self.make_block_same_class( - values, placement=placement or slice(0, len(values), 1) - ) + placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) + return self.make_block_same_class(values, placement=placement) @pytest.fixture From 86960ad91b438d6bcf01c57c3fdf3118d23c5e52 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 30 Mar 2020 08:52:04 -0700 Subject: [PATCH 2/2] go back to _concatenator --- pandas/core/internals/blocks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c93568088862e..596e99440d57e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2814,7 +2814,9 @@ def concat_same_type(self, to_concat): 1. Change Categorical._concat_same_type to use union_categoricals 2. Delete this method. """ - values = self._holder._concat_same_type([blk.values for blk in to_concat]) + values = self._concatenator( + [blk.values for blk in to_concat], axis=self.ndim - 1 + ) placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) # not using self.make_block_same_class as values can be object dtype return self.make_block(