diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index bdeb9c48990a2..24e1f641c7925 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -500,6 +500,7 @@ Performance improvements - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`) - :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`) - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`) +- Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`) - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`) - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`) - Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c3d4ad721c830..166ddc9524a9e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -799,7 +799,7 @@ def dtypes(self) -> Series: """ from pandas import Series - names = com.fill_missing_names([level.name for level in self.levels]) + names = com.fill_missing_names(self.names) return Series([level.dtype for level in self.levels], index=Index(names)) def __len__(self) -> int: @@ -1572,7 +1572,7 @@ def _format_multi( def _get_names(self) -> FrozenList: return FrozenList(self._names) - def _set_names(self, names, *, level=None, validate: bool = True) -> None: + def _set_names(self, names, *, level=None) -> None: """ Set new names on index. Each name has to be a hashable type. @@ -1583,8 +1583,6 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None: level : int, level name, or sequence of int/level names (default None) If the index is a MultiIndex (hierarchical), level(s) to set (None for all levels). Otherwise level must be None - validate : bool, default True - validate that the names match level lengths Raises ------ @@ -1603,13 +1601,12 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None: raise ValueError("Names should be list-like for a MultiIndex") names = list(names) - if validate: - if level is not None and len(names) != len(level): - raise ValueError("Length of names must match length of level.") - if level is None and len(names) != self.nlevels: - raise ValueError( - "Length of names must match number of levels in MultiIndex." - ) + if level is not None and len(names) != len(level): + raise ValueError("Length of names must match length of level.") + if level is None and len(names) != self.nlevels: + raise ValueError( + "Length of names must match number of levels in MultiIndex." + ) if level is None: level = range(self.nlevels) @@ -1627,8 +1624,9 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None: ) self._names[lev] = name - # If .levels has been accessed, the names in our cache will be stale. - self._reset_cache() + # If .levels has been accessed, the .name of each level in our cache + # will be stale. + self._reset_cache("levels") names = property( fset=_set_names, diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 43aec12055cd2..c7ed21a2cc001 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -5,6 +5,7 @@ from pandas import ( DataFrame, MultiIndex, + RangeIndex, Series, ) import pandas._testing as tm @@ -68,3 +69,19 @@ def test_indexer_caching(monkeypatch): s[s == 0] = 1 expected = Series(np.ones(size_cutoff), index=index) tm.assert_series_equal(s, expected) + + +def test_set_names_only_clears_level_cache(): + mi = MultiIndex.from_arrays([range(4), range(4)], names=["a", "b"]) + mi.dtypes + mi.is_monotonic_increasing + mi._engine + mi.levels + old_cache_keys = sorted(mi._cache.keys()) + assert old_cache_keys == ["_engine", "dtypes", "is_monotonic_increasing", "levels"] + mi.names = ["A", "B"] + new_cache_keys = sorted(mi._cache.keys()) + assert new_cache_keys == ["_engine", "dtypes", "is_monotonic_increasing"] + new_levels = mi.levels + tm.assert_index_equal(new_levels[0], RangeIndex(4, name="A")) + tm.assert_index_equal(new_levels[1], RangeIndex(4, name="B"))