diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 8025087395919..38a1802340c69 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -681,7 +681,7 @@ Interval Indexing ^^^^^^^^ -- Bug in :meth:`Index.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`) +- Bug in :meth:`Index.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`) - Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`) - Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`) - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 28dfdc23eb76e..705a279638097 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2980,8 +2980,8 @@ def _union(self, other: Index, sort): # If objects are unorderable, we must have object dtype. return np.array(value_list, dtype=object) - elif not other.is_unique and not self.is_unique: - # self and other both have duplicates + elif not other.is_unique: + # other has duplicates # error: Argument 1 to "union_with_duplicates" has incompatible type # "Union[ExtensionArray, ndarray]"; expected "ndarray" @@ -2990,7 +2990,7 @@ def _union(self, other: Index, sort): result = algos.union_with_duplicates(lvals, rvals) # type: ignore[arg-type] return _maybe_try_sort(result, sort) - # Either other or self is not unique + # Self may have duplicates # find indexes of things in "other" that are not in "self" if self.is_unique: indexer = self.get_indexer(other) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 5cff23943b57d..3555d043659cf 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -547,7 +547,7 @@ def test_union_with_duplicate_index_and_non_monotonic(cls): result = a.union(b) tm.assert_index_equal(result, expected) - result = a.union(b) + result = b.union(a) tm.assert_index_equal(result, expected) @@ -579,6 +579,29 @@ def test_union_nan_in_both(dup): tm.assert_index_equal(result, expected) +@pytest.mark.parametrize( + "cls", + [ + Int64Index, + Float64Index, + DatetimeIndex, + TimedeltaIndex, + lambda x: Index(x, dtype=object), + ], +) +def test_union_with_duplicate_index_not_subset_and_non_monotonic(cls): + # GH#36289 + a = cls([1, 0, 2]) + b = cls([0, 0, 1]) + expected = cls([0, 0, 1, 2]) + + result = a.union(b) + tm.assert_index_equal(result, expected) + + result = b.union(a) + tm.assert_index_equal(result, expected) + + class TestSetOpsUnsorted: # These may eventually belong in a dtype-specific test_setops, or # parametrized over a more general fixture