diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e57166f7a4861..786bdd502fb1b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1024,6 +1024,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` with ``sort=False`` results in sorted index (:issue:`17041`) - Bug in :meth:`concat` when ``axis=1`` and ``sort=False`` where the resulting Index was a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`46675`) - Bug in :meth:`wide_to_long` raises when ``stubnames`` is missing in columns and ``i`` contains string dtype column (:issue:`46044`) +- Bug in :meth:`DataFrame.join` with categorical index results in unexpected reordering (:issue:`47812`) Sparse ^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a212da050e1f1..239e6656ea151 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4682,6 +4682,7 @@ def join( not isinstance(self, ABCMultiIndex) or not any(is_categorical_dtype(dtype) for dtype in self.dtypes) ) + and not is_categorical_dtype(self.dtype) ): # Categorical is monotonic if data are ordered as categories, but join can # not handle this in case of not lexicographically monotonic GH#38502 diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 905c2af2d22a5..d97c6a3dacdc3 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -712,6 +712,21 @@ def test_join_datetime_string(self): ) tm.assert_frame_equal(result, expected) + def test_join_with_categorical_index(self): + # GH47812 + ix = ["a", "b"] + id1 = pd.CategoricalIndex(ix, categories=ix) + id2 = pd.CategoricalIndex(reversed(ix), categories=reversed(ix)) + + df1 = DataFrame({"c1": ix}, index=id1) + df2 = DataFrame({"c2": reversed(ix)}, index=id2) + result = df1.join(df2) + expected = DataFrame( + {"c1": ["a", "b"], "c2": ["a", "b"]}, + index=pd.CategoricalIndex(["a", "b"], categories=["a", "b"]), + ) + tm.assert_frame_equal(result, expected) + def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix="_y"):