Skip to content

Commit f621ea6

Browse files
committed
Move .unstack() logic onto BlockManager and Block
1 parent 9ae1c10 commit f621ea6

File tree

3 files changed

+38
-40
lines changed

3 files changed

+38
-40
lines changed

pandas/core/categorical.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ def maybe_to_categorical(array):
128128
""" coerce to a categorical if a series is given """
129129
if isinstance(array, (ABCSeries, ABCCategoricalIndex)):
130130
return array._values
131+
elif isinstance(array, np.ndarray):
132+
return Categorical(array)
131133
return array
132134

133135

pandas/core/internals.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1484,6 +1484,10 @@ def equals(self, other):
14841484
return False
14851485
return array_equivalent(self.values, other.values)
14861486

1487+
def _unstack(self, new_values, new_placement):
1488+
"""Return a list of unstacked blocks of self"""
1489+
return [make_block(new_values, placement=new_placement)]
1490+
14871491
def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
14881492
"""
14891493
compute the quantiles of the
@@ -1712,6 +1716,12 @@ def _slice(self, slicer):
17121716
def _try_cast_result(self, result, dtype=None):
17131717
return result
17141718

1719+
def _unstack(self, new_values, new_placement):
1720+
# NonConsolidatable blocks can have a single item only, so we return
1721+
# one block per item
1722+
return [self.make_block_same_class(vals, [place])
1723+
for vals, place in zip(new_values, new_placement)]
1724+
17151725

17161726
class NumericBlock(Block):
17171727
__slots__ = ()
@@ -4167,6 +4177,27 @@ def canonicalize(block):
41674177
return all(block.equals(oblock)
41684178
for block, oblock in zip(self_blocks, other_blocks))
41694179

4180+
def unstack(self, unstacker):
4181+
"""Return blockmanager with all blocks unstacked"""
4182+
dummy = unstacker(np.empty((0, 0)), value_columns=self.items)
4183+
new_columns = dummy.get_new_columns()
4184+
new_index = dummy.get_new_index()
4185+
new_blocks = []
4186+
mask_columns = np.zeros_like(new_columns, dtype=bool)
4187+
4188+
for blk in self.blocks:
4189+
bunstacker = unstacker(
4190+
blk.values.T, value_columns=self.items[blk.mgr_locs.indexer])
4191+
new_items = bunstacker.get_new_columns()
4192+
new_values, mask = bunstacker.get_new_values()
4193+
new_placement = new_columns.get_indexer(new_items)
4194+
mask_columns[new_placement] = mask.any(0)
4195+
new_blocks.extend(blk._unstack(new_values.T, new_placement))
4196+
4197+
bm = BlockManager(new_blocks, [new_columns, new_index])
4198+
bm = bm.take(mask_columns.nonzero()[0], axis=0)
4199+
return bm
4200+
41704201

41714202
class SingleBlockManager(BlockManager):
41724203
""" manage a single block with """

pandas/core/reshape/reshape.py

Lines changed: 5 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable=W0703,W0622,W0613,W0201
33
from pandas.compat import range, text_type, zip
44
from pandas import compat
5+
from functools import partial
56
import itertools
67
import re
78

@@ -475,48 +476,12 @@ def unstack(obj, level, fill_value=None):
475476

476477

477478
def _unstack_frame(obj, level, fill_value=None):
478-
from pandas.core.internals import BlockManager, make_block as _make_block
479-
480479
if obj._is_mixed_type:
481-
unstacker = _Unstacker(np.empty((0, 0)), # dummy
482-
obj.index, level=level,
483-
value_columns=obj.columns)
484-
new_columns = unstacker.get_new_columns()
485-
new_index = unstacker.get_new_index()
486-
new_axes = [new_columns, new_index]
487-
488-
new_blocks = []
489-
mask_blocks = np.zeros_like(new_columns, dtype=bool)
490-
for blk in obj._data.blocks:
491-
blk_items = obj._data.items[blk.mgr_locs.indexer]
492-
bunstacker = _Unstacker(blk.values.T, obj.index, level=level,
493-
value_columns=blk_items,
494-
fill_value=fill_value)
495-
new_items = bunstacker.get_new_columns()
496-
new_placement = new_columns.get_indexer(new_items)
497-
new_values, mask = bunstacker.get_new_values()
498-
499-
mask_blocks[new_placement] = mask.any(0)
500-
501-
# BlockManager can't handle SparseBlocks with multiple items,
502-
# so lets make one block for each item
503-
if is_sparse(blk.values):
504-
new_placement = [[i] for i in new_placement]
505-
new_values = new_values.T
506-
make_block = blk.make_block_same_class
507-
else:
508-
new_placement = [new_placement]
509-
new_values = [new_values.T]
510-
make_block = _make_block
511-
512-
for cols, placement in zip(new_values, new_placement):
513-
newb = make_block(cols, placement=placement)
514-
new_blocks.append(newb)
515-
480+
unstacker = partial(_Unstacker, index=obj.index,
481+
level=level, fill_value=fill_value)
482+
blocks = obj._data.unstack(unstacker)
516483
klass = type(obj)
517-
assert klass in (SparseDataFrame, DataFrame), klass
518-
result = klass(BlockManager(new_blocks, new_axes))
519-
return result.loc[:, mask_blocks]
484+
return klass(blocks)
520485
else:
521486
unstacker = _Unstacker(obj.values, obj.index, level=level,
522487
value_columns=obj.columns,

0 commit comments

Comments
 (0)