Skip to content

REF/PERF: Move MultiIndex._tuples to MultiIndex._cache #35641

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 7 additions & 13 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ class MultiIndex(Index):
_comparables = ["names"]
rename = Index.set_names

_tuples = None
sortorder: Optional[int]

# --------------------------------------------------------------------
Expand Down Expand Up @@ -634,16 +633,9 @@ def from_frame(cls, df, sortorder=None, names=None):

# --------------------------------------------------------------------

@property
@cache_readonly
def _values(self):
# We override here, since our parent uses _data, which we don't use.
return self.values

@property
def values(self):
if self._tuples is not None:
return self._tuples

values = []

for i in range(self.nlevels):
Expand All @@ -657,8 +649,12 @@ def values(self):
vals = np.array(vals, copy=False)
values.append(vals)

self._tuples = lib.fast_zip(values)
return self._tuples
arr = lib.fast_zip(values)
return arr

@property
def values(self):
return self._values

@property
def array(self):
Expand Down Expand Up @@ -737,7 +733,6 @@ def _set_levels(
if any(names):
self._set_names(names)

self._tuples = None
self._reset_cache()

def set_levels(self, levels, level=None, inplace=None, verify_integrity=True):
Expand Down Expand Up @@ -906,7 +901,6 @@ def _set_codes(

self._codes = new_codes

self._tuples = None
self._reset_cache()

def set_codes(self, codes, level=None, inplace=None, verify_integrity=True):
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,10 @@ def read_hdf(
mode : {'r', 'r+', 'a'}, default 'r'
Mode to use when opening the file. Ignored if path_or_buf is a
:class:`pandas.HDFStore`. Default is 'r'.
errors : str, default 'strict'
Specifies how encoding and decoding errors are to be handled.
See the errors argument for :func:`open` for a full list
of options.
where : list, optional
A list of Term (or convertible) objects.
start : int, optional
Expand All @@ -332,10 +336,6 @@ def read_hdf(
Return an iterator object.
chunksize : int, optional
Number of rows to include in an iteration when using an iterator.
errors : str, default 'strict'
Specifies how encoding and decoding errors are to be handled.
See the errors argument for :func:`open` for a full list
of options.
Copy link
Contributor Author

@topper-123 topper-123 Aug 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CI complained that this doc string element is in a different order than in the signature, so I moved it to be after mode like in the signature, which must be correct.

I got no idea why this is failing here and not in other PRs...

**kwargs
Additional keyword arguments passed to HDFStore.

Expand Down
29 changes: 22 additions & 7 deletions pandas/tests/indexes/multi/test_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,24 +68,33 @@ def test_inplace_mutation_resets_values():

mi1 = MultiIndex(levels=levels, codes=codes)
mi2 = MultiIndex(levels=levels2, codes=codes)

# instantiating MultiIndex should not access/cache _.values
assert "_values" not in mi1._cache
assert "_values" not in mi2._cache

vals = mi1.values.copy()
vals2 = mi2.values.copy()

assert mi1._tuples is not None
# accessing .values should cache ._values
assert mi1._values is mi1._cache["_values"]
assert mi1.values is mi1._cache["_values"]
assert isinstance(mi1._cache["_values"], np.ndarray)

# Make sure level setting works
new_vals = mi1.set_levels(levels2).values
tm.assert_almost_equal(vals2, new_vals)

# Non-inplace doesn't kill _tuples [implementation detail]
tm.assert_almost_equal(mi1._tuples, vals)
# Non-inplace doesn't drop _values from _cache [implementation detail]
tm.assert_almost_equal(mi1._cache["_values"], vals)

# ...and values is still same too
tm.assert_almost_equal(mi1.values, vals)

# Inplace should kill _tuples
# Inplace should drop _values from _cache
with tm.assert_produces_warning(FutureWarning):
mi1.set_levels(levels2, inplace=True)
assert "_values" not in mi1._cache
tm.assert_almost_equal(mi1.values, vals2)

# Make sure label setting works too
Expand All @@ -95,18 +104,24 @@ def test_inplace_mutation_resets_values():

# Must be 1d array of tuples
assert exp_values.shape == (6,)
new_values = mi2.set_codes(codes2).values

new_mi = mi2.set_codes(codes2)
assert "_values" not in new_mi._cache
new_values = new_mi.values
assert "_values" in new_mi._cache

# Not inplace shouldn't change
tm.assert_almost_equal(mi2._tuples, vals2)
tm.assert_almost_equal(mi2._cache["_values"], vals2)

# Should have correct values
tm.assert_almost_equal(exp_values, new_values)

# ...and again setting inplace should kill _tuples, etc
# ...and again setting inplace should drop _values from _cache, etc
with tm.assert_produces_warning(FutureWarning):
mi2.set_codes(codes2, inplace=True)
assert "_values" not in mi2._cache
tm.assert_almost_equal(mi2.values, new_values)
assert "_values" in mi2._cache


def test_ndarray_compat_properties(idx, compat_props):
Expand Down