From c9f4e67006513e4b9911c40d7846a22418b1cd88 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 9 Aug 2020 10:06:42 +0100 Subject: [PATCH 1/4] REF/PERF: move MultiIndex._tuples into _cache --- pandas/core/indexes/multi.py | 20 ++++++---------- pandas/tests/indexes/multi/test_compat.py | 29 +++++++++++++++-------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 13927dede5542..448c2dfe4a29d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -243,7 +243,6 @@ class MultiIndex(Index): _comparables = ["names"] rename = Index.set_names - _tuples = None sortorder: Optional[int] # -------------------------------------------------------------------- @@ -634,16 +633,9 @@ def from_frame(cls, df, sortorder=None, names=None): # -------------------------------------------------------------------- - @property + @cache_readonly def _values(self): # We override here, since our parent uses _data, which we don't use. - return self.values - - @property - def values(self): - if self._tuples is not None: - return self._tuples - values = [] for i in range(self.nlevels): @@ -657,8 +649,12 @@ def values(self): vals = np.array(vals, copy=False) values.append(vals) - self._tuples = lib.fast_zip(values) - return self._tuples + arr = lib.fast_zip(values) + return arr + + @property + def values(self): + return self._values @property def array(self): @@ -737,7 +733,6 @@ def _set_levels( if any(names): self._set_names(names) - self._tuples = None self._reset_cache() def set_levels(self, levels, level=None, inplace=None, verify_integrity=True): @@ -906,7 +901,6 @@ def _set_codes( self._codes = new_codes - self._tuples = None self._reset_cache() def set_codes(self, codes, level=None, inplace=None, verify_integrity=True): diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index b2500efef9e03..1816ddeb14856 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -68,45 +68,54 @@ def test_inplace_mutation_resets_values(): mi1 = MultiIndex(levels=levels, codes=codes) mi2 = MultiIndex(levels=levels2, codes=codes) + + assert "_values" not in mi1._cache + assert "_values" not in mi2._cache + vals = mi1.values.copy() vals2 = mi2.values.copy() - assert mi1._tuples is not None + assert isinstance(mi1._cache["_values"], np.ndarray) # Make sure level setting works new_vals = mi1.set_levels(levels2).values tm.assert_almost_equal(vals2, new_vals) - # Non-inplace doesn't kill _tuples [implementation detail] - tm.assert_almost_equal(mi1._tuples, vals) + # Non-inplace doesn't drop _values from _cache [implementation detail] + tm.assert_almost_equal(mi1._cache["_values"], vals) # ...and values is still same too tm.assert_almost_equal(mi1.values, vals) - # Inplace should kill _tuples + # Inplace should drop _values from _cache with tm.assert_produces_warning(FutureWarning): mi1.set_levels(levels2, inplace=True) + assert "_values" not in mi1._cache tm.assert_almost_equal(mi1.values, vals2) # Make sure label setting works too codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] exp_values = np.empty((6,), dtype=object) exp_values[:] = [(1, "a")] * 6 - # Must be 1d array of tuples assert exp_values.shape == (6,) - new_values = mi2.set_codes(codes2).values + + new_mi = mi2.set_codes(codes2) + assert "_values" not in new_mi._cache + new_values = new_mi.values + assert "_values" in new_mi._cache # Not inplace shouldn't change - tm.assert_almost_equal(mi2._tuples, vals2) + tm.assert_almost_equal(mi2._cache["_values"], vals2) # Should have correct values tm.assert_almost_equal(exp_values, new_values) - # ...and again setting inplace should kill _tuples, etc - with tm.assert_produces_warning(FutureWarning): - mi2.set_codes(codes2, inplace=True) + # ...and again setting inplace should drop _values from _cache, etc + mi2.set_codes(codes2, inplace=True) + assert "_values" not in mi2._cache tm.assert_almost_equal(mi2.values, new_values) + assert "_values" in mi2._cache def test_ndarray_compat_properties(idx, compat_props): From ce1e3f424402b83ccae24b74c93d8afea7bfe234 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 9 Aug 2020 10:12:19 +0100 Subject: [PATCH 2/4] add some tests --- pandas/tests/indexes/multi/test_compat.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index 1816ddeb14856..1e7a9591ff945 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -69,12 +69,16 @@ def test_inplace_mutation_resets_values(): mi1 = MultiIndex(levels=levels, codes=codes) mi2 = MultiIndex(levels=levels2, codes=codes) + # instantiating MultiIndex should not access/cache _.values assert "_values" not in mi1._cache assert "_values" not in mi2._cache vals = mi1.values.copy() vals2 = mi2.values.copy() + # accessing .values should cache ._values + assert mi1._values is mi1._cache["_values"] + assert mi1.values is mi1._cache["_values"] assert isinstance(mi1._cache["_values"], np.ndarray) # Make sure level setting works @@ -97,6 +101,7 @@ def test_inplace_mutation_resets_values(): codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] exp_values = np.empty((6,), dtype=object) exp_values[:] = [(1, "a")] * 6 + # Must be 1d array of tuples assert exp_values.shape == (6,) From cf57bbc05fe381c87dfe5dd797a2fa650ed3492d Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 Aug 2020 19:51:16 +0100 Subject: [PATCH 3/4] add assertion --- pandas/tests/indexes/multi/test_compat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index 1e7a9591ff945..72b5ed0edaa78 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -117,7 +117,8 @@ def test_inplace_mutation_resets_values(): tm.assert_almost_equal(exp_values, new_values) # ...and again setting inplace should drop _values from _cache, etc - mi2.set_codes(codes2, inplace=True) + with tm.assert_produces_warning(FutureWarning): + mi2.set_codes(codes2, inplace=True) assert "_values" not in mi2._cache tm.assert_almost_equal(mi2.values, new_values) assert "_values" in mi2._cache From e8b4e3b629f340c7ca6d3c3d88126a5f7e1d7848 Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 10 Aug 2020 20:25:17 +0100 Subject: [PATCH 4/4] CLN: move read_hdf doc string --- pandas/io/pytables.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index aeb7b3e044794..2abc570a04de3 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -320,6 +320,10 @@ def read_hdf( mode : {'r', 'r+', 'a'}, default 'r' Mode to use when opening the file. Ignored if path_or_buf is a :class:`pandas.HDFStore`. Default is 'r'. + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. where : list, optional A list of Term (or convertible) objects. start : int, optional @@ -332,10 +336,6 @@ def read_hdf( Return an iterator object. chunksize : int, optional Number of rows to include in an iteration when using an iterator. - errors : str, default 'strict' - Specifies how encoding and decoding errors are to be handled. - See the errors argument for :func:`open` for a full list - of options. **kwargs Additional keyword arguments passed to HDFStore.