diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 414794dd6a56e..1e0c6a7ad9222 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -1035,6 +1035,7 @@ MultiIndex - Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in the result (:issue:`38623`) - Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when the :class:`MultiIndex` contained ``NaN`` even when they are differently ordered (:issue:`38439`) - Bug in :meth:`MultiIndex.intersection` always returning an empty result when intersecting with :class:`CategoricalIndex` (:issue:`38653`) +- Bug in :meth:`MultiIndex.difference` incorrectly raising ``TypeError`` when indexes contain non-sortable entries (:issue:`41915`) - Bug in :meth:`MultiIndex.reindex` raising a ``ValueError`` when used on an empty :class:`MultiIndex` and indexing only a specific level (:issue:`41170`) - Bug in :meth:`MultiIndex.reindex` raising ``TypeError`` when reindexing against a flat :class:`Index` (:issue:`41707`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 96ddf9ce76f53..dc3e1f9874a44 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3091,7 +3091,7 @@ def intersection(self, other, sort=False): return this.intersection(other, sort=sort) result = self._intersection(other, sort=sort) - return self._wrap_setop_result(other, result) + return self._wrap_intersection_result(other, result) def _intersection(self, other: Index, sort=False): """ @@ -3113,6 +3113,10 @@ def _intersection(self, other: Index, sort=False): res_values = _maybe_try_sort(res_values, sort) return res_values + def _wrap_intersection_result(self, other, result): + # We will override for MultiIndex to handle empty results + return self._wrap_setop_result(other, result) + def _intersection_via_get_indexer(self, other: Index, sort) -> ArrayLike: """ Find the intersection of two Indexes using get_indexer. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4dff63ea22e00..f4d8de12718af 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3588,27 +3588,10 @@ def _maybe_match_names(self, other): names.append(None) return names - def _intersection(self, other, sort=False) -> MultiIndex: + def _wrap_intersection_result(self, other, result): other, result_names = self._convert_can_do_setop(other) - other = other.astype(object, copy=False) - uniq_tuples = None # flag whether _inner_indexer was successful - if self.is_monotonic and other.is_monotonic: - try: - inner_tuples = self._inner_indexer(other)[0] - sort = False # inner_tuples is already sorted - except TypeError: - pass - else: - uniq_tuples = algos.unique(inner_tuples) - - if uniq_tuples is None: - uniq_tuples = self._intersection_via_get_indexer(other, sort) - - if sort is None: - uniq_tuples = sorted(uniq_tuples) - - if len(uniq_tuples) == 0: + if len(result) == 0: return MultiIndex( levels=self.levels, codes=[[]] * self.nlevels, @@ -3616,22 +3599,12 @@ def _intersection(self, other, sort=False) -> MultiIndex: verify_integrity=False, ) else: - return MultiIndex.from_arrays( - zip(*uniq_tuples), sortorder=0, names=result_names - ) + return MultiIndex.from_arrays(zip(*result), sortorder=0, names=result_names) def _difference(self, other, sort) -> MultiIndex: other, result_names = self._convert_can_do_setop(other) - this = self._get_unique_index() - - indexer = this.get_indexer(other) - indexer = indexer.take((indexer != -1).nonzero()[0]) - - label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) - difference = this._values.take(label_diff) - if sort is None: - difference = sorted(difference) + difference = super()._difference(other, sort) if len(difference) == 0: return MultiIndex( diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index eb456bee39dbf..f43e3104c64d7 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -216,11 +216,10 @@ def test_difference_sort_incomparable(): other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) # sort=None, the default - # MultiIndex.difference deviates here from other difference - # implementations in not catching the TypeError - msg = "'<' not supported between instances of 'Timestamp' and 'int'" - with pytest.raises(TypeError, match=msg): + msg = "sort order is undefined for incomparable objects" + with tm.assert_produces_warning(RuntimeWarning, match=msg): result = idx.difference(other) + tm.assert_index_equal(result, idx) # sort=False result = idx.difference(other, sort=False)