From 5af419c60c7c7154e53440778323a2532d0ddef6 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 9 Jun 2021 18:17:20 -0700 Subject: [PATCH 1/2] REF: de-duplicate _intersection, _difference --- pandas/core/indexes/base.py | 6 +++- pandas/core/indexes/multi.py | 35 +++-------------------- pandas/tests/indexes/multi/test_setops.py | 7 ++--- 3 files changed, 12 insertions(+), 36 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fcf576efb73ab..2e201384f8f2a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3091,7 +3091,7 @@ def intersection(self, other, sort=False): return this.intersection(other, sort=sort) result = self._intersection(other, sort=sort) - return self._wrap_setop_result(other, result) + return self._wrap_intersection_result(other, result) def _intersection(self, other: Index, sort=False): """ @@ -3113,6 +3113,10 @@ def _intersection(self, other: Index, sort=False): res_values = _maybe_try_sort(res_values, sort) return res_values + def _wrap_intersection_result(self, other, result): + # We will override for MultiIndex to handle empty results + return self._wrap_setop_result(other, result) + def _intersection_via_get_indexer(self, other: Index, sort) -> ArrayLike: """ Find the intersection of two Indexes using get_indexer. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0876847aed84f..2abe1f84cabec 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3597,27 +3597,10 @@ def _maybe_match_names(self, other): names.append(None) return names - def _intersection(self, other, sort=False) -> MultiIndex: + def _wrap_intersection_result(self, other, result): other, result_names = self._convert_can_do_setop(other) - other = other.astype(object, copy=False) - uniq_tuples = None # flag whether _inner_indexer was successful - if self.is_monotonic and other.is_monotonic: - try: - inner_tuples = self._inner_indexer(other)[0] - sort = False # inner_tuples is already sorted - except TypeError: - pass - else: - uniq_tuples = algos.unique(inner_tuples) - - if uniq_tuples is None: - uniq_tuples = self._intersection_via_get_indexer(other, sort) - - if sort is None: - uniq_tuples = sorted(uniq_tuples) - - if len(uniq_tuples) == 0: + if len(result) == 0: return MultiIndex( levels=self.levels, codes=[[]] * self.nlevels, @@ -3625,22 +3608,12 @@ def _intersection(self, other, sort=False) -> MultiIndex: verify_integrity=False, ) else: - return MultiIndex.from_arrays( - zip(*uniq_tuples), sortorder=0, names=result_names - ) + return MultiIndex.from_arrays(zip(*result), sortorder=0, names=result_names) def _difference(self, other, sort) -> MultiIndex: other, result_names = self._convert_can_do_setop(other) - this = self._get_unique_index() - - indexer = this.get_indexer(other) - indexer = indexer.take((indexer != -1).nonzero()[0]) - - label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) - difference = this._values.take(label_diff) - if sort is None: - difference = sorted(difference) + difference = super()._difference(other, sort) if len(difference) == 0: return MultiIndex( diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index eb456bee39dbf..f43e3104c64d7 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -216,11 +216,10 @@ def test_difference_sort_incomparable(): other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) # sort=None, the default - # MultiIndex.difference deviates here from other difference - # implementations in not catching the TypeError - msg = "'<' not supported between instances of 'Timestamp' and 'int'" - with pytest.raises(TypeError, match=msg): + msg = "sort order is undefined for incomparable objects" + with tm.assert_produces_warning(RuntimeWarning, match=msg): result = idx.difference(other) + tm.assert_index_equal(result, idx) # sort=False result = idx.difference(other, sort=False) From b59362da381f0d824937cadea5e66ab68dac41dd Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 10 Jun 2021 16:24:57 -0700 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1e9921c357ef6..918ef93efab11 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -1011,6 +1011,7 @@ MultiIndex - Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in the result (:issue:`38623`) - Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when the :class:`MultiIndex` contained ``NaN`` even when they are differently ordered (:issue:`38439`) - Bug in :meth:`MultiIndex.intersection` always returning an empty result when intersecting with :class:`CategoricalIndex` (:issue:`38653`) +- Bug in :meth:`MultiIndex.difference` incorrectly raising ``TypeError`` when indexes contain non-sortable entries (:issue:`41915`) - Bug in :meth:`MultiIndex.reindex` raising a ``ValueError`` when used on an empty :class:`MultiIndex` and indexing only a specific level (:issue:`41170`) - Bug in :meth:`MultiIndex.reindex` raising ``TypeError`` when reindexing against a flat :class:`Index` (:issue:`41707`)