Skip to content

Commit d4618d3

Browse files
committed
BUG: Fix/test SparseSeries/SparseDataFrame stack/unstack
1 parent d43aba8 commit d4618d3

File tree

3 files changed

+54
-13
lines changed

3 files changed

+54
-13
lines changed

doc/source/whatsnew/v0.21.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,7 @@ Sparse
588588

589589
- Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`)
590590
- Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`)
591-
591+
- Bug in :func:`SparseSeries.unstack` and :func:`SparseDataFrame.stack` (:issue:`16614`, :issue:`15045`)
592592

593593
Reshaping
594594
^^^^^^^^^

pandas/core/reshape/reshape.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas.core.dtypes.common import (
1111
_ensure_platform_int,
1212
is_list_like, is_bool_dtype,
13-
needs_i8_conversion)
13+
needs_i8_conversion, is_sparse)
1414
from pandas.core.dtypes.cast import maybe_promote
1515
from pandas.core.dtypes.missing import notna
1616
import pandas.core.dtypes.concat as _concat
@@ -75,10 +75,15 @@ def __init__(self, values, index, level=-1, value_columns=None,
7575
fill_value=None):
7676

7777
self.is_categorical = None
78+
self.is_sparse = is_sparse(values)
7879
if values.ndim == 1:
7980
if isinstance(values, Categorical):
8081
self.is_categorical = values
8182
values = np.array(values)
83+
elif self.is_sparse:
84+
# XXX: Makes SparseArray *dense*, but it's supposedly
85+
# a single column at a time, so it's "doable"
86+
values = values.values
8287
values = values[:, np.newaxis]
8388
self.values = values
8489
self.value_columns = value_columns
@@ -177,7 +182,8 @@ def get_result(self):
177182
ordered=ordered)
178183
for i in range(values.shape[-1])]
179184

180-
return DataFrame(values, index=index, columns=columns)
185+
klass = SparseDataFrame if self.is_sparse else DataFrame
186+
return klass(values, index=index, columns=columns)
181187

182188
def get_new_values(self):
183189
values = self.values
@@ -472,15 +478,15 @@ def _unstack_frame(obj, level, fill_value=None):
472478
from pandas.core.internals import BlockManager, make_block
473479

474480
if obj._is_mixed_type:
475-
unstacker = _Unstacker(np.empty(obj.shape, dtype=bool), # dummy
481+
unstacker = _Unstacker(np.empty((0, 0)), # dummy
476482
obj.index, level=level,
477483
value_columns=obj.columns)
478484
new_columns = unstacker.get_new_columns()
479485
new_index = unstacker.get_new_index()
480486
new_axes = [new_columns, new_index]
481487

482488
new_blocks = []
483-
mask_blocks = []
489+
mask_blocks = np.zeros_like(new_columns, dtype=bool)
484490
for blk in obj._data.blocks:
485491
blk_items = obj._data.items[blk.mgr_locs.indexer]
486492
bunstacker = _Unstacker(blk.values.T, obj.index, level=level,
@@ -490,15 +496,25 @@ def _unstack_frame(obj, level, fill_value=None):
490496
new_placement = new_columns.get_indexer(new_items)
491497
new_values, mask = bunstacker.get_new_values()
492498

493-
mblk = make_block(mask.T, placement=new_placement)
494-
mask_blocks.append(mblk)
499+
mask_blocks[new_placement] = mask.any(0)
495500

496-
newb = make_block(new_values.T, placement=new_placement)
497-
new_blocks.append(newb)
501+
# BlockManager can't handle SparseBlocks with multiple items,
502+
# so lets make one block for each item
503+
if is_sparse(blk.values):
504+
new_placement = [[i] for i in new_placement]
505+
new_values = new_values.T
506+
else:
507+
new_placement = [new_placement]
508+
new_values = [new_values.T]
509+
510+
for cols, placement in zip(new_values, new_placement):
511+
newb = blk.make_block_same_class(cols, placement=placement)
512+
new_blocks.append(newb)
498513

499-
result = DataFrame(BlockManager(new_blocks, new_axes))
500-
mask_frame = DataFrame(BlockManager(mask_blocks, new_axes))
501-
return result.loc[:, mask_frame.sum(0) > 0]
514+
klass = type(obj)
515+
assert klass in (SparseDataFrame, DataFrame), klass
516+
result = klass(BlockManager(new_blocks, new_axes))
517+
return result.loc[:, mask_blocks]
502518
else:
503519
unstacker = _Unstacker(obj.values, obj.index, level=level,
504520
value_columns=obj.columns,
@@ -559,7 +575,9 @@ def factorize(index):
559575
mask = notna(new_values)
560576
new_values = new_values[mask]
561577
new_index = new_index[mask]
562-
return Series(new_values, index=new_index)
578+
579+
klass = type(frame)._constructor_sliced
580+
return klass(new_values, index=new_index)
563581

564582

565583
def stack_multiple(frame, level, dropna=True):

pandas/tests/test_multilevel.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2388,6 +2388,29 @@ def test_iloc_mi(self):
23882388
tm.assert_frame_equal(result, expected)
23892389

23902390

2391+
class TestSparse(object):
2392+
def setup_method(self, method):
2393+
self.sdf = pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye
2394+
self.mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
2395+
2396+
def test_sparse_frame_stack(self):
2397+
ss = self.sdf.stack()
2398+
expected = pd.SparseSeries(np.ones(3), index=self.mi)
2399+
tm.assert_sp_series_equal(ss, expected)
2400+
2401+
def test_sparse_frame_unstack(self):
2402+
mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)])
2403+
sdf = self.sdf
2404+
sdf.index = mi
2405+
df = pd.DataFrame(np.eye(3), index=mi).replace(0, np.nan)
2406+
2407+
tm.assert_numpy_array_equal(df.unstack().values, sdf.unstack().values)
2408+
2409+
def test_sparse_series_unstack(self):
2410+
frame = pd.SparseSeries(np.ones(3), index=self.mi).unstack()
2411+
tm.assert_sp_frame_equal(frame, self.sdf)
2412+
2413+
23912414
class TestSorted(Base):
23922415
""" everthing you wanted to test about sorting """
23932416

0 commit comments

Comments
 (0)