Description
Code Sample, a copy-pastable example if possible
In [2]: df = pd.DataFrame([[2, 1], [4, (1,2)]]).set_index([0, 1])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/home/pietro/nobackup/repo/pandas/pandas/core/categorical.py in __init__(self, values, categories, ordered, name, fastpath)
288 try:
--> 289 codes, categories = factorize(values, sort=True)
290 except TypeError:
/home/pietro/nobackup/repo/pandas/pandas/core/algorithms.py in factorize(values, sort, order, na_sentinel, size_hint)
360 uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
--> 361 assume_unique=True)
362
/home/pietro/nobackup/repo/pandas/pandas/core/algorithms.py in safe_sort(values, labels, na_sentinel, assume_unique)
258 # unorderable in py3 if mixed str/int
--> 259 ordered = sort_mixed(values)
260 else:
/home/pietro/nobackup/repo/pandas/pandas/core/algorithms.py in sort_mixed(values)
251 dtype=bool)
--> 252 nums = np.sort(values[~str_pos])
253 strs = np.sort(values[str_pos])
/usr/lib/python3/dist-packages/numpy/core/fromnumeric.py in sort(a, axis, kind, order)
821 a = asanyarray(a).copy(order="K")
--> 822 a.sort(axis=axis, kind=kind, order=order)
823 return a
TypeError: unorderable types: tuple() > int()
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-2-b560307721b0> in <module>()
----> 1 df = pd.DataFrame([[2, 1], [4, (1,2)]]).set_index([0, 1])
/home/pietro/nobackup/repo/pandas/pandas/core/frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
2907 arrays.append(level)
2908
-> 2909 index = MultiIndex.from_arrays(arrays, names=names)
2910
2911 if verify_integrity and not index.is_unique:
/home/pietro/nobackup/repo/pandas/pandas/indexes/multi.py in from_arrays(cls, arrays, sortorder, names)
1085 from pandas.core.categorical import _factorize_from_iterables
1086
-> 1087 labels, levels = _factorize_from_iterables(arrays)
1088 if names is None:
1089 names = [getattr(arr, "name", None) for arr in arrays]
/home/pietro/nobackup/repo/pandas/pandas/core/categorical.py in _factorize_from_iterables(iterables)
2082 # For consistency, it should return a list of 2 lists.
2083 return [[], []]
-> 2084 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
/home/pietro/nobackup/repo/pandas/pandas/core/categorical.py in <listcomp>(.0)
2082 # For consistency, it should return a list of 2 lists.
2083 return [[], []]
-> 2084 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
/home/pietro/nobackup/repo/pandas/pandas/core/categorical.py in _factorize_from_iterable(values)
2054 codes = values.codes
2055 else:
-> 2056 cat = Categorical(values, ordered=True)
2057 categories = cat.categories
2058 codes = cat.codes
/home/pietro/nobackup/repo/pandas/pandas/core/categorical.py in __init__(self, values, categories, ordered, name, fastpath)
293 # raise, as we don't have a sortable data structure and so
294 # the user should give us one by specifying categories
--> 295 raise TypeError("'values' is not ordered, please "
296 "explicitly specify the categories order "
297 "by passing in a categories argument.")
TypeError: 'values' is not ordered, please explicitly specify the categories order by passing in a categories argument.
Problem description
I would understand the unorderable types
error - after all, I'm asking to have different types in the index, and pandas maybe wants to compare them, and it can't (this error obviously only affects Python3). But then,
In [3]: pd.DataFrame([[2, 'a'], [4, (1,2)]]).set_index([0, 1])
Out[3]:
Empty DataFrame
Columns: []
Index: [(2, a), (4, (1, 2))]
works fine, (even though 'a' > (1,2)
raises TypeError
), and even allows me to sort the resulting index! Moreover, I understand this should be supported.
Expected Output
Empty DataFrame
Columns: []
Index: [(4, 1), (2, (1, 2))]
Output of pd.show_versions()
pandas: 0.19.0+473.gf65a641
pytest: 3.0.6
pip: 8.1.2
setuptools: 28.0.0
Cython: 0.23.4
numpy: 1.12.0
scipy: 0.18.1
xarray: None
IPython: 5.1.0.dev
sphinx: 1.4.8
patsy: 0.3.0-dev
dateutil: 2.5.3
pytz: 2015.7
blosc: None
bottleneck: 1.2.0
tables: 3.2.2
numexpr: 2.6.0
feather: None
matplotlib: 2.0.0rc2
openpyxl: 2.3.0
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.3
lxml: 3.6.4
bs4: 4.5.1
html5lib: 0.999
httplib2: 0.9.1
apiclient: 1.5.2
sqlalchemy: 1.0.15
pymysql: None
psycopg2: None
jinja2: 2.8
s3fs: None
pandas_datareader: 0.2.1