Description
Code Sample, a copy-pastable example if possible
In [2]: pd.Series([1, 2, 3, 1], dtype='category').reindex([1,2,3,4,5], fill_value=-1)
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-2-dd2f847ae90c> in <module>()
----> 1 pd.Series([1, 2, 3, 1], dtype='category').reindex([1,2,3,4,5], fill_value=-1)
~/nobackup/repo/pandas/pandas/core/series.py in reindex(self, index, **kwargs)
2638 @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
2639 def reindex(self, index=None, **kwargs):
-> 2640 return super(Series, self).reindex(index=index, **kwargs)
2641
2642 @Appender(generic._shared_docs['fillna'] % _shared_doc_kwargs)
~/nobackup/repo/pandas/pandas/core/generic.py in reindex(self, *args, **kwargs)
3005 # perform the reindex on the axes
3006 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 3007 fill_value, copy).__finalize__(self)
3008
3009 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
~/nobackup/repo/pandas/pandas/core/generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3023 obj = obj._reindex_with_indexers({axis: [new_index, indexer]},
3024 fill_value=fill_value,
-> 3025 copy=copy, allow_dups=False)
3026
3027 return obj
~/nobackup/repo/pandas/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
3126 fill_value=fill_value,
3127 allow_dups=allow_dups,
-> 3128 copy=copy)
3129
3130 if copy and new_data is self._data:
~/nobackup/repo/pandas/pandas/core/internals.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
4139 if axis == 0:
4140 new_blocks = self._slice_take_blocks_ax0(indexer,
-> 4141 fill_tuple=(fill_value,))
4142 else:
4143 new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=(
~/nobackup/repo/pandas/pandas/core/internals.py in _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple)
4178 return [blk.take_nd(slobj, axis=0,
4179 new_mgr_locs=slice(0, sllen),
-> 4180 fill_tuple=fill_tuple)]
4181
4182 if sl_type in ('slice', 'mask'):
~/nobackup/repo/pandas/pandas/core/internals.py in take_nd(self, indexer, axis, new_mgr_locs, fill_tuple)
2402 # but are passed the axis depending on the calling routing
2403 # if its REALLY axis 0, then this will be a reindex and not a take
-> 2404 new_values = self.values.take_nd(indexer, fill_value=fill_value)
2405
2406 # if we are a 1-dim object, then always place at 0
~/nobackup/repo/pandas/pandas/core/categorical.py in take_nd(self, indexer, allow_fill, fill_value)
1712 # filling must always be None/nan here
1713 # but is passed thru internally
-> 1714 assert isna(fill_value)
1715
1716 codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
AssertionError:
Problem description
At least, the error can be improved (after all, nothing in the docs suggests fill_value
must be a missing_value
, so the fact that this actually works with fill_value=np.nan
is an implementation detail), but maybe we could even actually support this? The effort of just adding one element to the categories and set new elements to it should be minimal.
Expected Output
Either the new elements set to -1, or at least a more meaningful error message.
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.5.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.9.0-3-amd64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: it_IT.UTF-8
LOCALE: it_IT.UTF-8
pandas: 0.22.0.dev0+84.g8dac63314
pytest: 3.0.6
pip: 9.0.1
setuptools: 36.6.0
Cython: 0.25.2
numpy: 1.12.1
scipy: 0.19.0
pyarrow: None
xarray: None
IPython: 6.2.1
sphinx: 1.5.6
patsy: 0.4.1
dateutil: 2.6.1
pytz: 2017.2
blosc: None
bottleneck: 1.2.0dev
tables: 3.3.0
numexpr: 2.6.1
feather: 0.3.1
matplotlib: 2.0.0
openpyxl: None
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.6
lxml: None
bs4: 4.5.3
html5lib: 0.999999999
sqlalchemy: 1.0.15
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: 0.2.1