Skip to content

Commit 32abe63

Browse files
authored
REF/PERF: Move MultiIndex._tuples to MultiIndex._cache (#35641)
1 parent df1d440 commit 32abe63

File tree

3 files changed

+33
-24
lines changed

3 files changed

+33
-24
lines changed

pandas/core/indexes/multi.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,6 @@ class MultiIndex(Index):
243243
_comparables = ["names"]
244244
rename = Index.set_names
245245

246-
_tuples = None
247246
sortorder: Optional[int]
248247

249248
# --------------------------------------------------------------------
@@ -634,16 +633,9 @@ def from_frame(cls, df, sortorder=None, names=None):
634633

635634
# --------------------------------------------------------------------
636635

637-
@property
636+
@cache_readonly
638637
def _values(self):
639638
# We override here, since our parent uses _data, which we don't use.
640-
return self.values
641-
642-
@property
643-
def values(self):
644-
if self._tuples is not None:
645-
return self._tuples
646-
647639
values = []
648640

649641
for i in range(self.nlevels):
@@ -657,8 +649,12 @@ def values(self):
657649
vals = np.array(vals, copy=False)
658650
values.append(vals)
659651

660-
self._tuples = lib.fast_zip(values)
661-
return self._tuples
652+
arr = lib.fast_zip(values)
653+
return arr
654+
655+
@property
656+
def values(self):
657+
return self._values
662658

663659
@property
664660
def array(self):
@@ -737,7 +733,6 @@ def _set_levels(
737733
if any(names):
738734
self._set_names(names)
739735

740-
self._tuples = None
741736
self._reset_cache()
742737

743738
def set_levels(self, levels, level=None, inplace=None, verify_integrity=True):
@@ -906,7 +901,6 @@ def _set_codes(
906901

907902
self._codes = new_codes
908903

909-
self._tuples = None
910904
self._reset_cache()
911905

912906
def set_codes(self, codes, level=None, inplace=None, verify_integrity=True):

pandas/io/pytables.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,10 @@ def read_hdf(
320320
mode : {'r', 'r+', 'a'}, default 'r'
321321
Mode to use when opening the file. Ignored if path_or_buf is a
322322
:class:`pandas.HDFStore`. Default is 'r'.
323+
errors : str, default 'strict'
324+
Specifies how encoding and decoding errors are to be handled.
325+
See the errors argument for :func:`open` for a full list
326+
of options.
323327
where : list, optional
324328
A list of Term (or convertible) objects.
325329
start : int, optional
@@ -332,10 +336,6 @@ def read_hdf(
332336
Return an iterator object.
333337
chunksize : int, optional
334338
Number of rows to include in an iteration when using an iterator.
335-
errors : str, default 'strict'
336-
Specifies how encoding and decoding errors are to be handled.
337-
See the errors argument for :func:`open` for a full list
338-
of options.
339339
**kwargs
340340
Additional keyword arguments passed to HDFStore.
341341

pandas/tests/indexes/multi/test_compat.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,24 +68,33 @@ def test_inplace_mutation_resets_values():
6868

6969
mi1 = MultiIndex(levels=levels, codes=codes)
7070
mi2 = MultiIndex(levels=levels2, codes=codes)
71+
72+
# instantiating MultiIndex should not access/cache _.values
73+
assert "_values" not in mi1._cache
74+
assert "_values" not in mi2._cache
75+
7176
vals = mi1.values.copy()
7277
vals2 = mi2.values.copy()
7378

74-
assert mi1._tuples is not None
79+
# accessing .values should cache ._values
80+
assert mi1._values is mi1._cache["_values"]
81+
assert mi1.values is mi1._cache["_values"]
82+
assert isinstance(mi1._cache["_values"], np.ndarray)
7583

7684
# Make sure level setting works
7785
new_vals = mi1.set_levels(levels2).values
7886
tm.assert_almost_equal(vals2, new_vals)
7987

80-
# Non-inplace doesn't kill _tuples [implementation detail]
81-
tm.assert_almost_equal(mi1._tuples, vals)
88+
# Non-inplace doesn't drop _values from _cache [implementation detail]
89+
tm.assert_almost_equal(mi1._cache["_values"], vals)
8290

8391
# ...and values is still same too
8492
tm.assert_almost_equal(mi1.values, vals)
8593

86-
# Inplace should kill _tuples
94+
# Inplace should drop _values from _cache
8795
with tm.assert_produces_warning(FutureWarning):
8896
mi1.set_levels(levels2, inplace=True)
97+
assert "_values" not in mi1._cache
8998
tm.assert_almost_equal(mi1.values, vals2)
9099

91100
# Make sure label setting works too
@@ -95,18 +104,24 @@ def test_inplace_mutation_resets_values():
95104

96105
# Must be 1d array of tuples
97106
assert exp_values.shape == (6,)
98-
new_values = mi2.set_codes(codes2).values
107+
108+
new_mi = mi2.set_codes(codes2)
109+
assert "_values" not in new_mi._cache
110+
new_values = new_mi.values
111+
assert "_values" in new_mi._cache
99112

100113
# Not inplace shouldn't change
101-
tm.assert_almost_equal(mi2._tuples, vals2)
114+
tm.assert_almost_equal(mi2._cache["_values"], vals2)
102115

103116
# Should have correct values
104117
tm.assert_almost_equal(exp_values, new_values)
105118

106-
# ...and again setting inplace should kill _tuples, etc
119+
# ...and again setting inplace should drop _values from _cache, etc
107120
with tm.assert_produces_warning(FutureWarning):
108121
mi2.set_codes(codes2, inplace=True)
122+
assert "_values" not in mi2._cache
109123
tm.assert_almost_equal(mi2.values, new_values)
124+
assert "_values" in mi2._cache
110125

111126

112127
def test_ndarray_compat_properties(idx, compat_props):

0 commit comments

Comments
 (0)