Closed
Description
edit: behavior with DataFrame[sparse]
In [68]: a=np.zeros(shape=(5,5))
In [69]: pd.DataFrame(a).apply(lambda x: pd.SparseArray(x, fill_value=0)).drop_duplicates()
Out[69]:
0 1 2 3 4
0 NaN NaN NaN NaN NaN
It seems as if pd.SparseDataFrame().duplicated consistently fails when SparseDataFrame contains the value provided by `default_fill_value` in the data.
When SpraseDataFrame does not contain the default_fill_value everything works fine.
to reproduce, default_fill_value can be anything, either the default NaN or a manually specified one (I chose 0).
Reproduction:
```
In [2]: import pandas as pd
In [3]: import numpy as np
In [4]: a=np.zeros(shape=(5,5))
In [5]: df=pd.SparseDataFrame(a)
In [6]: df.drop_duplicates() # <- This is good
Out[6]:
0 1 2 3 4
0 0 0 0 0 0
In [8]: df=pd.SparseDataFrame(a, default_fill_value=0)
In [9]: df.drop_duplicates() # <- This should work similarly to [6] but fails instead
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-9-daf275b6788b> in <module>()
----> 1 df.drop_duplicates()
/usr/local/lib/python2.7/dist-packages/pandas/util/decorators.pyc in wrapper(*args, **kwargs)
87 else:
88 kwargs[new_arg_name] = new_arg_value
---> 89 return func(*args, **kwargs)
90 return wrapper
91 return _deprecate_kwarg
/usr/local/lib/python2.7/dist-packages/pandas/util/decorators.pyc in wrapper(*args, **kwargs)
87 else:
88 kwargs[new_arg_name] = new_arg_value
---> 89 return func(*args, **kwargs)
90 return wrapper
91 return _deprecate_kwarg
/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in drop_duplicates(self, subset, keep, inplace)
3003 deduplicated : DataFrame
3004 """
-> 3005 duplicated = self.duplicated(subset, keep=keep)
3006
3007 if inplace:
/usr/local/lib/python2.7/dist-packages/pandas/util/decorators.pyc in wrapper(*args, **kwargs)
87 else:
88 kwargs[new_arg_name] = new_arg_value
---> 89 return func(*args, **kwargs)
90 return wrapper
91 return _deprecate_kwarg
/usr/local/lib/python2.7/dist-packages/pandas/util/decorators.pyc in wrapper(*args, **kwargs)
87 else:
88 kwargs[new_arg_name] = new_arg_value
---> 89 return func(*args, **kwargs)
90 return wrapper
91 return _deprecate_kwarg
/usr/local/lib/python2.7/dist-packages/pandas/core/frame.pyc in duplicated(self, subset, keep)
3056
3057 ids = get_group_index(labels, shape, sort=False, xnull=False)
-> 3058 return Series(duplicated_int64(ids, keep), index=self.index)
3059
3060 #----------------------------------------------------------------------
/usr/local/lib/python2.7/dist-packages/pandas/core/series.pyc in __init__(self, data, index, dtype, name, copy, fastpath)
225 raise_cast_failure=True)
226
--> 227 data = SingleBlockManager(data, index, fastpath=True)
228
229 generic.NDFrame.__init__(self, data, fastpath=True)
/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in __init__(self, block, axis, do_integrity_check, fastpath)
3734 block = make_block(block,
3735 placement=slice(0, len(axis)),
-> 3736 ndim=1, fastpath=True)
3737
3738 self.blocks = [block]
/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in make_block(values, placement, klass, ndim, dtype, fastpath)
2452
2453 return klass(values, ndim=ndim, fastpath=fastpath,
-> 2454 placement=placement)
2455
2456
/usr/local/lib/python2.7/dist-packages/pandas/core/internals.pyc in __init__(self, values, placement, ndim, fastpath)
85 raise ValueError('Wrong number of items passed %d,'
86 ' placement implies %d' % (
---> 87 len(self.values), len(self.mgr_locs)))
88
89 @property
ValueError: Wrong number of items passed 0, placement implies 5
```
show_versions():
```
In [2]: pd.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.9.final.0
python-bits: 64
OS: Linux
OS-release: 3.16.0-4-amd64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
pandas: 0.17.1
nose: 1.3.7
pip: 7.1.2
setuptools: 18.5
Cython: 0.23.4
numpy: 1.10.1
scipy: 0.16.1
statsmodels: None
IPython: 4.0.0
sphinx: 1.3.1
patsy: None
dateutil: 2.4.2
pytz: 2015.7
blosc: None
bottleneck: None
tables: None
numexpr: 2.4.6
matplotlib: 1.5.0
openpyxl: None
xlrd: None
xlwt: 1.0.0
xlsxwriter: None
lxml: None
bs4: None
html5lib: 0.9999999
httplib2: None
apiclient: None
sqlalchemy: 1.0.9
pymysql: None
psycopg2: 2.6.1 (dt dec pq3 ext lo64)
Jinja2: None
```