Closed
Description
In [1]: import pandas as pd
In [2]: pd.__version__
Out[2]: '0.23.4'
In [3]: a = pd.Series(pd.SparseArray([0, 1, 2]))
In [4]: pd.concat([a, a], axis=1)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-a3e65bc6fb67> in <module>()
----> 1 pd.concat([a, a], axis=1)
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, sort, copy)
224 verify_integrity=verify_integrity,
225 copy=copy, sort=sort)
--> 226 return op.get_result()
227
228
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/reshape/concat.py in get_result(self)
398
399 index, columns = self.new_axes
--> 400 df = cons(data, index=index)
401 df.columns = columns
402 return df.__finalize__(self, method='concat')
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
346 dtype=dtype, copy=copy)
347 elif isinstance(data, dict):
--> 348 mgr = self._init_dict(data, index, columns, dtype=dtype)
349 elif isinstance(data, ma.MaskedArray):
350 import numpy.ma.mrecords as mrecords
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/frame.py in _init_dict(self, data, index, columns, dtype)
457 arrays = [data[k] for k in keys]
458
--> 459 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
460
461 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
7362 axes = [_ensure_index(columns), _ensure_index(index)]
7363
-> 7364 return create_block_manager_from_arrays(arrays, arr_names, axes)
7365
7366
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/internals.py in create_block_manager_from_arrays(arrays, names, axes)
4875 return mgr
4876 except ValueError as e:
-> 4877 construction_error(len(arrays), arrays[0].shape, axes, e)
4878
4879
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/internals.py in construction_error(tot_items, block_shape, axes, e)
4837 implied = tuple(map(int, [len(ax) for ax in axes]))
4838 if passed == implied and e is not None:
-> 4839 raise e
4840 if block_shape[0] == 0:
4841 raise ValueError("Empty data passed with indices specified.")
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/internals.py in create_block_manager_from_arrays(arrays, names, axes)
4870
4871 try:
-> 4872 blocks = form_blocks(arrays, names, axes)
4873 mgr = BlockManager(blocks, axes)
4874 mgr._consolidate_inplace()
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/internals.py in form_blocks(arrays, names, axes)
4916
4917 if len(items_dict['IntBlock']):
-> 4918 int_blocks = _multi_blockify(items_dict['IntBlock'])
4919 blocks.extend(int_blocks)
4920
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/internals.py in _multi_blockify(tuples, dtype)
4993 for dtype, tup_block in grouper:
4994
-> 4995 values, placement = _stack_arrays(list(tup_block), dtype)
4996
4997 block = make_block(values, placement=placement)
~/Envs/dask-dev/lib/python3.7/site-packages/pandas/core/internals.py in _stack_arrays(tuples, dtype)
5037 stacked = np.empty(shape, dtype=dtype)
5038 for i, arr in enumerate(arrays):
-> 5039 stacked[i] = _asarray_compat(arr)
5040
5041 return stacked, placement
ValueError: could not broadcast input array from shape (2) into shape (3)
Right now on master, we return a SparseDataFrame. I would like to instead return a DataFrame with sparse values. On master, the rule for the result type is currently "sparse if any of the inputs are sparse". I would amend that to specifically be "sparse if any of the inputs are a SparseDataFrame or SparseSeries". This will require breaking a couple places like SparseSeries.unstack()
, unless we hack in some special cases for sparse, which I'd like to avoid.