Description
Reproducing the error:
import numpy as np, pandas as pd
# providing CategoricalDtype via e.g. pd.cut will also produce the error
s = pd.Series([np.nan, np.nan, np.nan, np.nan], dtype=pd.CategoricalDtype(categories=[pd.Interval(0,1)]))
s.iloc[:3] = [np.nan, pd.Interval(0,1), np.nan]
# ValueError: Reindexing only valid with uniquely valued Index objects
# these work:
s.iloc[:3] = [np.nan, np.nan, np.nan]
s.iloc[:3] = [np.nan, pd.Interval(0,1), pd.Interval(0,1)]
# as does:
s = pd.Series([np.nan, np.nan, np.nan, np.nan], dtype=pd.CategoricalDtype(categories=['not-an-interval', pd.Interval(0,1)]))
s.iloc[:3] = [np.nan, pd.Interval(0,1), np.nan]
# as does:
d = pd.DataFrame({'s': pd.Series([np.nan]*4, dtype=pd.cut(range(3), bins=2).dtype), 'a': range(4)})
d['s'] = [np.nan, pd.Interval(1.0, 2.0), np.nan, np.nan]
d.loc[slice(0,2,1), 's'] = [pd.Interval(1.0, 2.0), np.nan, np.nan]
# but NOT:
d = pd.DataFrame({'s': pd.Series([np.nan]*4, dtype=pd.cut(range(3), bins=2).dtype), 'a': range(4)})
d.loc[slice(0,2,1), 's'] = [pd.Interval(1.0, 2.0), np.nan, np.nan]
The full traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-30-ca2aabf517a0> in <module>
1 s = pd.Series([np.nan, np.nan, np.nan, np.nan], dtype=pd.CategoricalDtype(categories=[pd.Interval(0,1)]))
----> 2 s.iloc[:3] = [np.nan, pd.Interval(0,1), np.nan]
c:\python36\lib\site-packages\pandas\core\indexing.py in __setitem__(self, key, value)
200 key = com.apply_if_callable(key, self.obj)
201 indexer = self._get_setitem_indexer(key)
--> 202 self._setitem_with_indexer(indexer, value)
203
204 def _validate_key(self, key, axis: int):
c:\python36\lib\site-packages\pandas\core\indexing.py in _setitem_with_indexer(self, indexer, value)
577 # actually do the set
578 self.obj._consolidate_inplace()
--> 579 self.obj._data = self.obj._data.setitem(indexer=indexer, value=value)
580 self.obj._maybe_update_cacher(clear=True)
581
c:\python36\lib\site-packages\pandas\core\internals\managers.py in setitem(self, **kwargs)
558
559 def setitem(self, **kwargs):
--> 560 return self.apply("setitem", **kwargs)
561
562 def putmask(self, **kwargs):
c:\python36\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
436 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
437
--> 438 applied = getattr(b, f)(**kwargs)
439 result_blocks = _extend_blocks(applied, result_blocks)
440
c:\python36\lib\site-packages\pandas\core\internals\blocks.py in setitem(self, indexer, value)
1834
1835 check_setitem_lengths(indexer, value, self.values)
-> 1836 self.values[indexer] = value
1837 return self
1838
c:\python36\lib\site-packages\pandas\core\arrays\categorical.py in __setitem__(self, key, value)
2182 from pandas import Index
2183
-> 2184 to_add = Index(rvalue).difference(self.categories)
2185
2186 # no assignments of values not in categories, but it's always ok to set
c:\python36\lib\site-packages\pandas\core\indexes\interval.py in func(intvidx_self, other, sort)
163 raise TypeError(msg.format(op=self.op_name))
164
--> 165 return setop(intvidx_self, other, sort)
166
167 return func
c:\python36\lib\site-packages\pandas\core\indexes\interval.py in func(self, other, sort)
1267 @SetopCheck(op_name=op_name)
1268 def func(self, other, sort=sort):
-> 1269 result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort)
1270 result_name = get_op_result_name(self, other)
1271
c:\python36\lib\site-packages\pandas\core\indexes\multi.py in difference(self, other, sort)
3299 this = self._get_unique_index()
3300
-> 3301 indexer = this.get_indexer(other)
3302 indexer = indexer.take((indexer != -1).nonzero()[0])
3303
c:\python36\lib\site-packages\pandas\core\indexes\multi.py in get_indexer(self, target, method, limit, tolerance)
2423 if not self.is_unique:
2424 raise ValueError(
-> 2425 "Reindexing only valid with uniquely valued " "Index objects"
2426 )
2427
ValueError: Reindexing only valid with uniquely valued Index objects
Problem description
The problem is a bit specific: the assignment has to be to a slice, the Categorical Series/DataFrame must have all Intervals for categories, and the rhs must have at least 2 np.nan
(or None) but not all of its elements should be np.nan
or else the assignment will work.
My guess is that this has something to do with the fact that when a Series or DataFrame column has a CategoricalDtype where the categories are all Intervals, inspection of the object shows that the categories are stored as an IntervalIndex:
In [1]: s.dtype.categories
Out[1]:
IntervalIndex([(0, 1]],
closed='right',
dtype='interval[int64]')
whereas a categorical object with non-Interval categories mixed in stores them as just a vanilla Index, and in the case of setting values to the DataFrame column it changes the column's dtype to dtype('O')
.
Expected Output
import numpy as np, pandas as pd
s = pd.Series([np.nan, np.nan, np.nan, np.nan], dtype=pd.CategoricalDtype(categories=[pd.Interval(0,1)]))
s.iloc[:3] = [np.nan, pd.Interval(0,1), np.nan]
print(s)
#0 NaN
#1 (0, 1]
#2 NaN
#3 NaN
#Length: 4, dtype: category
#Categories (1, interval[int64]): [(0, 1]]
Output of pd.show_versions()
INSTALLED VERSIONS
commit : None
python : 3.6.1.final.0
python-bits : 64
OS : Windows
OS-release : 10
machine : AMD64
processor : Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : None.None
pandas : 0.25.0
numpy : 1.17.0
pytz : 2018.3
dateutil : 2.8.0
pip : 19.2.1
setuptools : 41.0.1
Cython : 0.25.2
pytest : 3.7.2
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : 0.9999999
pymysql : 0.7.11.None
psycopg2 : None
jinja2 : 2.10.1
IPython : 7.5.0
pandas_datareader: None
bs4 : None
bottleneck : 1.2.1
fastparquet : None
gcsfs : None
lxml.etree : None
matplotlib : 3.0.2
numexpr : 2.7.0
odfpy : None
openpyxl : 2.4.8
pandas_gbq : None
pyarrow : None
pytables : None
s3fs : None
scipy : 1.1.0
sqlalchemy : None
tables : 3.5.2
xarray : None
xlrd : 1.0.0
xlwt : None
xlsxwriter : None