diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 794a7025fe218..7def8f6bdb2bc 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -780,6 +780,7 @@ Indexing - Bug in :meth:`DatetimeIndex.insert` when inserting ``np.datetime64("NaT")`` into a timezone-aware index incorrectly treating the timezone-naive value as timezone-aware (:issue:`39769`) - Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`) - Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`) +- Bug in :meth:`RangeIndex.astype` where when converting to :class:`CategoricalIndex`, the categories became a :class:`Int64Index` instead of a :class:`RangeIndex` (:issue:`41263`) - Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`) - Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`) - Bug in :meth:`DataFrame.__setitem__` and :meth:`DataFrame.iloc.__setitem__` raising ``ValueError`` when trying to index with a row-slice and setting a list as values (:issue:`40440`) @@ -945,6 +946,7 @@ Other - Bug in :meth:`Series.where` with numeric dtype and ``other = None`` not casting to ``nan`` (:issue:`39761`) - :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`) - Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`) +- Bug in :func:`pandas.testing.assert_index_equal` with ``exact=True`` not raising when comparing :class:`CategoricalIndex` instances with ``Int64Index`` and ``RangeIndex`` categories (:issue:`41263`) - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) - Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`) - Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 912039b7571bc..2d695458e32e6 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -309,18 +309,22 @@ def assert_index_equal( __tracebackhide__ = True def _check_types(left, right, obj="Index"): - if exact: - assert_class_equal(left, right, exact=exact, obj=obj) + if not exact: + return - # Skip exact dtype checking when `check_categorical` is False - if check_categorical: - assert_attr_equal("dtype", left, right, obj=obj) + assert_class_equal(left, right, exact=exact, obj=obj) - # allow string-like to have different inferred_types - if left.inferred_type in ("string"): - assert right.inferred_type in ("string") - else: - assert_attr_equal("inferred_type", left, right, obj=obj) + # Skip exact dtype checking when `check_categorical` is False + if check_categorical: + assert_attr_equal("dtype", left, right, obj=obj) + if is_categorical_dtype(left.dtype) and is_categorical_dtype(right.dtype): + assert_index_equal(left.categories, right.categories, exact=exact) + + # allow string-like to have different inferred_types + if left.inferred_type in ("string"): + assert right.inferred_type in ("string") + else: + assert_attr_equal("inferred_type", left, right, obj=obj) def _get_ilevel_values(index, level): # accept level number only diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7779335bfd3ba..6f414c91ce94c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -907,9 +907,7 @@ def astype(self, dtype, copy=True): elif is_categorical_dtype(dtype): from pandas.core.indexes.category import CategoricalIndex - return CategoricalIndex( - self._values, name=self.name, dtype=dtype, copy=copy - ) + return CategoricalIndex(self, name=self.name, dtype=dtype, copy=copy) elif is_extension_array_dtype(dtype): return Index(np.asarray(self), name=self.name, dtype=dtype, copy=copy) diff --git a/pandas/tests/indexes/categorical/test_constructors.py b/pandas/tests/indexes/categorical/test_constructors.py index 2acf79ee0bced..35620875d5a1a 100644 --- a/pandas/tests/indexes/categorical/test_constructors.py +++ b/pandas/tests/indexes/categorical/test_constructors.py @@ -108,8 +108,8 @@ def test_construction_with_dtype(self): tm.assert_index_equal(result, ci, exact=True) # make sure indexes are handled - expected = CategoricalIndex([0, 1, 2], categories=[0, 1, 2], ordered=True) idx = Index(range(3)) + expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True) result = CategoricalIndex(idx, categories=idx, ordered=True) tm.assert_index_equal(result, expected, exact=True) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 6139d8af48d98..8bbe8f9b9e0e2 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -667,20 +667,20 @@ def test_astype_category(self, copy, name, ordered, simple_index): # standard categories dtype = CategoricalDtype(ordered=ordered) result = idx.astype(dtype, copy=copy) - expected = CategoricalIndex(idx.values, name=name, ordered=ordered) - tm.assert_index_equal(result, expected) + expected = CategoricalIndex(idx, name=name, ordered=ordered) + tm.assert_index_equal(result, expected, exact=True) # non-standard categories dtype = CategoricalDtype(idx.unique().tolist()[:-1], ordered) result = idx.astype(dtype, copy=copy) - expected = CategoricalIndex(idx.values, name=name, dtype=dtype) - tm.assert_index_equal(result, expected) + expected = CategoricalIndex(idx, name=name, dtype=dtype) + tm.assert_index_equal(result, expected, exact=True) if ordered is False: # dtype='category' defaults to ordered=False, so only test once result = idx.astype("category", copy=copy) - expected = CategoricalIndex(idx.values, name=name) - tm.assert_index_equal(result, expected) + expected = CategoricalIndex(idx, name=name) + tm.assert_index_equal(result, expected, exact=True) def test_is_unique(self, simple_index): # initialize a unique index diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 82a3a223b442b..1778b6fb9d832 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -3,9 +3,11 @@ from pandas import ( Categorical, + CategoricalIndex, Index, MultiIndex, NaT, + RangeIndex, ) import pandas._testing as tm @@ -199,6 +201,28 @@ def test_index_equal_category_mismatch(check_categorical): tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) +@pytest.mark.parametrize("exact", [False, True]) +def test_index_equal_range_categories(check_categorical, exact): + # GH41263 + msg = """\ +Index are different + +Index classes are different +\\[left\\]: RangeIndex\\(start=0, stop=10, step=1\\) +\\[right\\]: Int64Index\\(\\[0, 1, 2, 3, 4, 5, 6, 7, 8, 9\\], dtype='int64'\\)""" + + rcat = CategoricalIndex(RangeIndex(10)) + icat = CategoricalIndex(list(range(10))) + + if check_categorical and exact: + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(rcat, icat, check_categorical=True, exact=True) + else: + tm.assert_index_equal( + rcat, icat, check_categorical=check_categorical, exact=exact + ) + + def test_assert_index_equal_mixed_dtype(): # GH#39168 idx = Index(["foo", "bar", 42])