Description
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
(optional) I have confirmed this bug exists on the master branch of pandas.
Note: Please read this guide detailing how to provide the necessary information for us to reproduce your bug.
Code Sample, a copy-pastable example
import pandas as pd
ser = pd.Series(range(0,100))
ser1 = pd.cut(ser, 10).value_counts().head(5)
ser2 = pd.cut(ser, 10).value_counts().tail(5)
pd.DataFrame({'1': ser1, '2': ser2})
Problem description
as of 1.3.0, this raises the following error. previously, this would run and produce the expected output (see below).
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-2-ad7564e1a27b> in <module>
3 ser1 = pd.cut(ser, 10).value_counts().head(5)
4 ser2 = pd.cut(ser, 10).value_counts().tail(5)
----> 5 pd.DataFrame({'1': ser1, '2': ser2})
~/workplace/ri/rime/python/rime/.venv/lib/python3.8/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
612 elif isinstance(data, dict):
613 # GH#38939 de facto copy defaults to False only in non-dict cases
--> 614 mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager)
615 elif isinstance(data, ma.MaskedArray):
616 import numpy.ma.mrecords as mrecords
~/workplace/ri/rime/python/rime/.venv/lib/python3.8/site-packages/pandas/core/internals/construction.py in dict_to_mgr(data, index, columns, dtype, typ, copy)
460 # TODO: can we get rid of the dt64tz special case above?
461
--> 462 return arrays_to_mgr(
463 arrays, data_names, index, columns, dtype=dtype, typ=typ, consolidate=copy
464 )
~/workplace/ri/rime/python/rime/.venv/lib/python3.8/site-packages/pandas/core/internals/construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype, verify_integrity, typ, consolidate)
120
121 # don't force copy because getting jammed in an ndarray anyway
--> 122 arrays = _homogenize(arrays, index, dtype)
123
124 else:
~/workplace/ri/rime/python/rime/.venv/lib/python3.8/site-packages/pandas/core/internals/construction.py in _homogenize(data, index, dtype)
566 # Forces alignment. No need to copy data since we
567 # are putting it into an ndarray later
--> 568 val = val.reindex(index, copy=False)
569
570 val = val._values
~/workplace/ri/rime/python/rime/.venv/lib/python3.8/site-packages/pandas/core/series.py in reindex(self, index, **kwargs)
4577 )
4578 def reindex(self, index=None, **kwargs):
-> 4579 return super().reindex(index=index, **kwargs)
4580
4581 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
~/workplace/ri/rime/python/rime/.venv/lib/python3.8/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4807
4808 # perform the reindex on the axes
-> 4809 return self._reindex_axes(
4810 axes, level, limit, tolerance, method, fill_value, copy
4811 ).__finalize__(self, method="reindex")
~/workplace/ri/rime/python/rime/.venv/lib/python3.8/site-packages/pandas/core/generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4823
4824 ax = self._get_axis(a)
-> 4825 new_index, indexer = ax.reindex(
4826 labels, level=level, limit=limit, tolerance=tolerance, method=method
4827 )
~/workplace/ri/rime/python/rime/.venv/lib/python3.8/site-packages/pandas/core/indexes/category.py in reindex(self, target, method, level, limit, tolerance)
422 # coerce to a regular index here!
423 result = Index(np.array(self), name=self.name)
--> 424 new_target, indexer, _ = result._reindex_non_unique(target)
425 else:
426
~/workplace/ri/rime/python/rime/.venv/lib/python3.8/site-packages/pandas/core/indexes/base.py in _reindex_non_unique(self, target)
3865 return self[:0], np.array([], dtype=np.intp), None
3866
-> 3867 indexer, missing = self.get_indexer_non_unique(target)
3868 check = indexer != -1
3869 new_labels = self.take(indexer[check])
~/workplace/ri/rime/python/rime/.venv/lib/python3.8/site-packages/pandas/core/indexes/interval.py in get_indexer_non_unique(self, target)
692 # because IntervalIndex does partial-int indexing
693 target = self._maybe_convert_i8(target)
--> 694 indexer, missing = self._engine.get_indexer_non_unique(target.values)
695
696 return ensure_platform_int(indexer), ensure_platform_int(missing)
pandas/_libs/intervaltree.pxi in pandas._libs.interval.IntervalTree.__pyx_fused_cpdef()
TypeError: No matching signature found
Expected Output
1 2
(-0.099, 9.9] 10.0 NaN
(9.9, 19.8] 10.0 NaN
(19.8, 29.7] 10.0 NaN
(29.7, 39.6] 10.0 NaN
(39.6, 49.5] 10.0 NaN
(49.5, 59.4] NaN 10.0
(59.4, 69.3] NaN 10.0
(69.3, 79.2] NaN 10.0
(79.2, 89.1] NaN 10.0
(89.1, 99.0] NaN 10.0
this expected output behavior is better imo because it is more consistent with how this would work for other types of categorical indices. for example:
ser1 = pd.Series([0,1,2,], index=pd.CategoricalIndex(['a', 'b', 'c'], categories = ['a', 'b', 'c', 'd', 'e']))
ser2 = pd.Series([2,4], index=pd.CategoricalIndex(['d', 'e'], categories = ['a', 'b', 'c', 'd', 'e']))
pd.DataFrame({'1': ser1, '2': ser2})
does NOT error. therefor it does not seem that the desired behavior is to error on non-overlapping categorical indices
Output of pd.show_versions()
INSTALLED VERSIONS
commit : f00ed8f
python : 3.8.6.final.0
python-bits : 64
OS : Darwin
OS-release : 20.3.0
Version : Darwin Kernel Version 20.3.0: Thu Jan 21 00:07:06 PST 2021; root:xnu-7195.81.3~1/RELEASE_X86_64
machine : x86_64
processor : i386
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.3.0
numpy : 1.20.3
pytz : 2021.1
dateutil : 2.8.1
pip : 21.0.1
setuptools : 57.0.0
Cython : 3.0a6
pytest : 6.2.4
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 3.0.1
IPython : 7.23.1
pandas_datareader: None
bs4 : None
bottleneck : None
fsspec : 2021.06.0
fastparquet : None
gcsfs : None
matplotlib : 3.4.2
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pyxlsb : None
s3fs : 2021.06.0
scipy : 1.6.3
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
xlwt : None
numba : None