BUG: TypeError: '<' not supported between instances of 'float' and 'str' when using `.combine_first` with categorical index containing nan

- [x] I have checked that this issue has not already been reported.

- [x] I have confirmed this bug exists on the latest version of pandas.

- [x] (optional) I have confirmed this bug exists on the master branch of pandas.

---

#### Code Sample, a copy-pastable example

```python
In [79]: x = ['b', 'b', 'c', 'a', 'b', np.nan]
    ...: y = ['a', 'b', 'c', 'a', 'b', 'd']
    ...: mi1 = pd.MultiIndex.from_arrays(
    ...:     [x, [1, 2, 3, 4, 5, 6]],
    ...:     names=['a', 'b']
    ...: )
    ...: df = pd.DataFrame({'c': [1, 1, 1, 1, 1, 1]}, index=mi1)
    ...: mi2 = pd.MultiIndex.from_arrays(
    ...:     [y, [1, 1, 1, 1, 1, 1]],
    ...:     names=['a', 'b']
    ...: )
    ...: s = pd.Series([1, 2, 3, 4, 5, 6], index=mi2)
    ...: df.combine_first(pd.DataFrame({'some_col': s}))
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/algorithms.py in safe_sort(values, codes, na_sentinel, assume_unique, verify)
   2060         try:
-> 2061             sorter = values.argsort()
   2062             ordered = values.take(sorter)

TypeError: '<' not supported between instances of 'float' and 'str'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-79-de018ddfae29> in <module>
     11 )
     12 s = pd.Series([1, 2, 3, 4, 5, 6], index=mi2)
---> 13 df.combine_first(pd.DataFrame({'some_col': s}))

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/frame.py in combine_first(self, other)
   6239             return expressions.where(mask, y_values, x_values)
   6240 
-> 6241         return self.combine(other, combiner, overwrite=False)
   6242 
   6243     def update(

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/frame.py in combine(self, other, func, fill_value, overwrite)
   6104         other_idxlen = len(other.index)  # save for compare
   6105 
-> 6106         this, other = self.align(other, copy=False)
   6107         new_index = this.index
   6108 

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/frame.py in align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
   3955         broadcast_axis=None,
   3956     ) -> "DataFrame":
-> 3957         return super().align(
   3958             other,
   3959             join=join,

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/generic.py in align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
   8542             axis = self._get_axis_number(axis)
   8543         if isinstance(other, ABCDataFrame):
-> 8544             return self._align_frame(
   8545                 other,
   8546                 join=join,

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/generic.py in _align_frame(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis)
   8589         if axis is None or axis == 0:
   8590             if not self.index.equals(other.index):
-> 8591                 join_index, ilidx, iridx = self.index.join(
   8592                     other.index, how=join, level=level, return_indexers=True
   8593                 )

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/indexes/base.py in join(self, other, how, level, return_indexers, sort)
   3491                 )
   3492             else:
-> 3493                 return self._join_non_unique(
   3494                     other, how=how, return_indexers=return_indexers
   3495                 )

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/indexes/base.py in _join_non_unique(self, other, how, return_indexers)
   3618         rvalues = other._get_engine_target()
   3619 
-> 3620         left_idx, right_idx = _get_join_indexers(
   3621             [lvalues], [rvalues], how=how, sort=True
   3622         )

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _get_join_indexers(left_keys, right_keys, sort, how, **kwargs)
   1326         for n in range(len(left_keys))
   1327     )
-> 1328     zipped = zip(*mapped)
   1329     llab, rlab, shape = [list(x) for x in zipped]
   1330 

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/reshape/merge.py in <genexpr>(.0)
   1323     # get left & right join labels and num. of levels at each location
   1324     mapped = (
-> 1325         _factorize_keys(left_keys[n], right_keys[n], sort=sort, how=how)
   1326         for n in range(len(left_keys))
   1327     )

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _factorize_keys(lk, rk, sort, how)
   1978     if sort:
   1979         uniques = rizer.uniques.to_array()
-> 1980         llab, rlab = _sort_labels(uniques, llab, rlab)
   1981 
   1982     # NA group

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/reshape/merge.py in _sort_labels(uniques, left, right)
   2003     labels = np.concatenate([left, right])
   2004 
-> 2005     _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1)
   2006     new_labels = ensure_int64(new_labels)
   2007     new_left, new_right = new_labels[:llength], new_labels[llength:]

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/algorithms.py in safe_sort(values, codes, na_sentinel, assume_unique, verify)
   2063         except TypeError:
   2064             # try this anyway
-> 2065             ordered = sort_mixed(values)
   2066 
   2067     # codes:

~/envs/pandas-test/lib/python3.8/site-packages/pandas/core/algorithms.py in sort_mixed(values)
   2046         # order ints before strings, safe in py3
   2047         str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
-> 2048         nums = np.sort(values[~str_pos])
   2049         strs = np.sort(values[str_pos])
   2050         return np.concatenate([nums, np.asarray(strs, dtype=object)])

<__array_function__ internals> in sort(*args, **kwargs)

~/envs/pandas-test/lib/python3.8/site-packages/numpy/core/fromnumeric.py in sort(a, axis, kind, order)
    989     else:
    990         a = asanyarray(a).copy(order="K")
--> 991     a.sort(axis=axis, kind=kind, order=order)
    992     return a
    993 

TypeError: '<' not supported between instances of 'float' and 'str'
```

#### Problem description

I use `df.combine_first(...)` to add a column to a dataframe while extending the index in case an index value does not exist in the target dataframe. However, if the MultIindex of the dataframe/series contains mixed np.nan/str values in their index value, the above `TypeError` is raised. I originally noticed this for categorical types (`x` and `y`), but the problem can be simplified to plain string types.

The reproduction of this error also seems to depend on the length/order of `x` and `y` (I tried to reduce it to fewer elements and kept the `np.nan`, but that didn't reproduce the error).

#### Expected Output

No exception and the dataframe containing the new column of series.

#### Output of ``pd.show_versions()``

<details>

In [84]: pd.show_versions()

INSTALLED VERSIONS
------------------
commit           : 2a7d3326dee660824a8433ffd01065f8ac37f7d6
python           : 3.8.5.final.0
python-bits      : 64
OS               : Darwin
OS-release       : 19.6.0
Version          : Darwin Kernel Version 19.6.0: Thu Jun 18 20:49:00 PDT 2020; root:xnu-6153.141.1~1/RELEASE_X86_64
machine          : x86_64
processor        : i386
byteorder        : little
LC_ALL           : None
LANG             : en_AU.UTF-8
LOCALE           : en_AU.UTF-8

pandas           : 1.1.2
numpy            : 1.19.2
pytz             : 2020.1
dateutil         : 2.8.1
pip              : 20.1.1
setuptools       : 46.4.0
Cython           : None
pytest           : None
hypothesis       : None
sphinx           : None
blosc            : None
feather          : None
xlsxwriter       : None
lxml.etree       : None
html5lib         : None
pymysql          : None
psycopg2         : None
jinja2           : None
IPython          : 7.18.1
pandas_datareader: None
bs4              : None
bottleneck       : None
fsspec           : None
fastparquet      : None
gcsfs            : None
matplotlib       : None
numexpr          : None
odfpy            : None
openpyxl         : None
pandas_gbq       : None
pyarrow          : None
pytables         : None
pyxlsb           : None
s3fs             : None
scipy            : None
sqlalchemy       : None
tables           : None
tabulate         : None
xarray           : None
xlrd             : None
xlwt             : None
numba            : None

</details>


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

BUG: TypeError: '<' not supported between instances of 'float' and 'str' when using `.combine_first` with categorical index containing nan #36562

Code Sample, a copy-pastable example

Problem description

Expected Output

Output of `pd.show_versions()`

INSTALLED VERSIONS

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

BUG: TypeError: '<' not supported between instances of 'float' and 'str' when using .combine_first with categorical index containing nan #36562

Description

Code Sample, a copy-pastable example

Problem description

Expected Output

Output of pd.show_versions()

INSTALLED VERSIONS

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions

BUG: TypeError: '<' not supported between instances of 'float' and 'str' when using `.combine_first` with categorical index containing nan #36562

Output of `pd.show_versions()`