From a28040d9aec155ee7f16c25c9ab68c54b7f18c61 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 14 Feb 2018 22:30:10 -0800 Subject: [PATCH 1/2] imlpement maybe_cache, test --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/properties.pyx | 68 ++++++++++++++++++++++++++++++++- pandas/core/base.py | 4 +- pandas/core/generic.py | 1 + pandas/core/indexes/base.py | 3 +- pandas/tests/test_lib.py | 20 ++++++++++ pandas/util/_decorators.py | 2 +- 7 files changed, 93 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 932618ba1df21..2a0e6f9ad4702 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -848,3 +848,4 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) +- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`) diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 4beb24f07c21c..9be89ae180b3b 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -9,12 +9,13 @@ from cpython cimport ( cdef class cache_readonly(object): cdef readonly: - object func, name, allow_setting + object func, name, allow_setting, __doc__ def __init__(self, func=None, allow_setting=False): if func is not None: self.func = func self.name = func.__name__ + self.__doc__ = func.__doc__ self.allow_setting = allow_setting def __call__(self, func, doc=None): @@ -23,8 +24,11 @@ cdef class cache_readonly(object): return self def __get__(self, obj, typ): - # Get the cache or set a default one if needed + if obj is None: + # accessed on the class, not the instance + return self + # Get the cache or set a default one if needed cache = getattr(obj, '_cache', None) if cache is None: try: @@ -55,6 +59,66 @@ cdef class cache_readonly(object): PyDict_SetItem(cache, self.name, value) + +cdef class maybe_cache(object): + """ + A configurable property-like class that acts like a cache_readonly + if an "_immutable" flag is True and a like property otherwise + """ + + cdef readonly: + object func, name, __doc__ + + def __init__(self, func=None): + if func is not None: + self.func = func + self.name = func.__name__ + self.__doc__ = func.__doc__ + + def __call__(self, func, doc=None): + self.func = func + self.name = func.__name__ + if doc is not None: + self.__doc__ = doc + else: + self.__doc__ = func.__doc__ + return self + + def __get__(self, obj, typ): + cdef: + bint immutable + + if obj is None: + # accessed on the class, not the instance + return self + + # Default to non-caching + immutable = getattr(typ, '_immutable', False) + if not immutable: + # behave like a property + val = self.func(obj) + return val + + # Get the cache or set a default one if needed + cache = getattr(obj, '_cache', None) + if cache is None: + try: + cache = obj._cache = {} + except AttributeError: + return + + if PyDict_Contains(cache, self.name): + # not necessary to Py_INCREF + val = PyDict_GetItem(cache, self.name) + else: + val = self.func(obj) + PyDict_SetItem(cache, self.name, val) + return val + + def __set__(self, obj, value): + raise Exception("cannot set values for [%s]" % self.name) + + cdef class AxisProperty(object): cdef: Py_ssize_t axis diff --git a/pandas/core/base.py b/pandas/core/base.py index 0ca029ffd4c25..972bf2d05a01f 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -23,7 +23,7 @@ import pandas._libs.lib as lib from pandas.compat.numpy import function as nv from pandas.compat import PYPY -from pandas.util._decorators import (Appender, cache_readonly, +from pandas.util._decorators import (Appender, cache_readonly, maybe_cache, deprecate_kwarg, Substitution) from pandas.core.accessor import DirNamesMixin @@ -842,7 +842,7 @@ def __iter__(self): """ return iter(self.tolist()) - @cache_readonly + @maybe_cache def hasnans(self): """ return if I have any nans; enables various perf speedups """ return isna(self).any() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 297450417e3cf..8965ed8ebf543 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -119,6 +119,7 @@ class NDFrame(PandasObject, SelectionMixin): 'consolidate', 'convert_objects', 'is_copy']) _metadata = [] _is_copy = None + _immutable = False def __init__(self, data, axes=None, copy=False, dtype=None, fastpath=False): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index be7c1624936bf..72401849cf624 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -48,7 +48,7 @@ import pandas.core.common as com import pandas.core.base as base from pandas.util._decorators import ( - Appender, Substitution, cache_readonly, deprecate_kwarg) + Appender, Substitution, cache_readonly, maybe_cache, deprecate_kwarg) from pandas.core.indexes.frozen import FrozenList import pandas.core.dtypes.concat as _concat import pandas.core.missing as missing @@ -177,6 +177,7 @@ class Index(IndexOpsMixin, PandasObject): _attributes = ['name'] _is_numeric_dtype = False _can_hold_na = True + _immutable = True # would we like our indexing holder to defer to us _defer_to_indexing = False diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 502f0c3bced61..b380405f69ac9 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -5,6 +5,7 @@ import numpy as np from pandas._libs import lib, writers as libwriters import pandas.util.testing as tm +import pandas as pd class TestMisc(object): @@ -198,3 +199,22 @@ def test_get_reverse_indexer(self): result = lib.get_reverse_indexer(indexer, 5) expected = np.array([4, 2, 3, 6, 7], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) + + +def test_maybe_cache(): + # GH#19700 + idx = pd.Index([0, 1]) + assert not idx.hasnans + assert 'hasnans' in idx._cache + ser = idx.to_series() + assert not ser.hasnans + assert not hasattr(ser, '_cache') + ser.iloc[-1] = np.nan + assert ser.hasnans + + +def test_cache_decorators_preserve_docstrings(): + # GH#19700 accessing class attributes that are cache_readonly or + # maybe_cache should a) not return None and b) reflect original docstrings + assert "perf" in pd.Index.hasnans.__doc__ # uses maybe_cache + assert "dtype str " in pd.Index.dtype_str.__doc__ # uses cache_readonly diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index eed9cee54efb3..144ea2509ad54 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -1,5 +1,5 @@ from pandas.compat import callable, signature, PY2 -from pandas._libs.properties import cache_readonly # noqa +from pandas._libs.properties import cache_readonly, maybe_cache # noqa import inspect import types import warnings From c82b30c37be0082ea62238368a610a17afac0685 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 15 Feb 2018 09:48:53 -0800 Subject: [PATCH 2/2] revert maybe_cache, test caching and docstrings --- pandas/_libs/properties.pyx | 4 ++++ pandas/core/base.py | 4 ++-- pandas/core/generic.py | 1 - pandas/core/indexes/base.py | 3 +-- pandas/core/series.py | 2 ++ pandas/tests/series/test_internals.py | 14 ++++++++++++++ pandas/tests/test_lib.py | 22 +++++----------------- pandas/util/_decorators.py | 2 +- 8 files changed, 29 insertions(+), 23 deletions(-) diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 9be89ae180b3b..ef1b4654209cd 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -21,6 +21,10 @@ cdef class cache_readonly(object): def __call__(self, func, doc=None): self.func = func self.name = func.__name__ + if doc is not None: + self.__doc__ = doc + else: + self.__doc__ = func.__doc__ return self def __get__(self, obj, typ): diff --git a/pandas/core/base.py b/pandas/core/base.py index 972bf2d05a01f..0ca029ffd4c25 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -23,7 +23,7 @@ import pandas._libs.lib as lib from pandas.compat.numpy import function as nv from pandas.compat import PYPY -from pandas.util._decorators import (Appender, cache_readonly, maybe_cache, +from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.accessor import DirNamesMixin @@ -842,7 +842,7 @@ def __iter__(self): """ return iter(self.tolist()) - @maybe_cache + @cache_readonly def hasnans(self): """ return if I have any nans; enables various perf speedups """ return isna(self).any() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8965ed8ebf543..297450417e3cf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -119,7 +119,6 @@ class NDFrame(PandasObject, SelectionMixin): 'consolidate', 'convert_objects', 'is_copy']) _metadata = [] _is_copy = None - _immutable = False def __init__(self, data, axes=None, copy=False, dtype=None, fastpath=False): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 72401849cf624..be7c1624936bf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -48,7 +48,7 @@ import pandas.core.common as com import pandas.core.base as base from pandas.util._decorators import ( - Appender, Substitution, cache_readonly, maybe_cache, deprecate_kwarg) + Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.core.indexes.frozen import FrozenList import pandas.core.dtypes.concat as _concat import pandas.core.missing as missing @@ -177,7 +177,6 @@ class Index(IndexOpsMixin, PandasObject): _attributes = ['name'] _is_numeric_dtype = False _can_hold_na = True - _immutable = True # would we like our indexing holder to defer to us _defer_to_indexing = False diff --git a/pandas/core/series.py b/pandas/core/series.py index 655eaa5373f5a..bb94a637ae7db 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -145,6 +145,8 @@ class Series(base.IndexOpsMixin, generic.NDFrame): ['asobject', 'sortlevel', 'reshape', 'get_value', 'set_value', 'from_csv', 'valid']) + hasnans = property(base.IndexOpsMixin.hasnans.func) + def __init__(self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False): diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 79e23459ac992..fff43c8a7cd7e 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -8,6 +8,7 @@ from numpy import nan import numpy as np +import pandas as pd from pandas import Series from pandas.core.indexes.datetimes import Timestamp import pandas._libs.lib as lib @@ -309,3 +310,16 @@ def test_convert_preserve_all_bool(self): r = s._convert(datetime=True, numeric=True) e = Series([False, True, False, False], dtype=bool) tm.assert_series_equal(r, e) + + +def test_hasnans_unchached_for_series(): + # GH#19700 + idx = pd.Index([0, 1]) + assert not idx.hasnans + assert 'hasnans' in idx._cache + ser = idx.to_series() + assert not ser.hasnans + assert not hasattr(ser, '_cache') + ser.iloc[-1] = np.nan + assert ser.hasnans + assert 'enables' in pd.Series.hasnans.__doc__ diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index b380405f69ac9..919262be6dbaf 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -201,20 +201,8 @@ def test_get_reverse_indexer(self): tm.assert_numpy_array_equal(result, expected) -def test_maybe_cache(): - # GH#19700 - idx = pd.Index([0, 1]) - assert not idx.hasnans - assert 'hasnans' in idx._cache - ser = idx.to_series() - assert not ser.hasnans - assert not hasattr(ser, '_cache') - ser.iloc[-1] = np.nan - assert ser.hasnans - - -def test_cache_decorators_preserve_docstrings(): - # GH#19700 accessing class attributes that are cache_readonly or - # maybe_cache should a) not return None and b) reflect original docstrings - assert "perf" in pd.Index.hasnans.__doc__ # uses maybe_cache - assert "dtype str " in pd.Index.dtype_str.__doc__ # uses cache_readonly +def test_cache_readonly_preserve_docstrings(): + # GH#19700 accessing class attributes that are cache_readonly + # should a) not return None and b) reflect original docstrings + assert "perf" in pd.Index.hasnans.__doc__ + assert "dtype str " in pd.Index.dtype_str.__doc__ diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 144ea2509ad54..eed9cee54efb3 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -1,5 +1,5 @@ from pandas.compat import callable, signature, PY2 -from pandas._libs.properties import cache_readonly, maybe_cache # noqa +from pandas._libs.properties import cache_readonly # noqa import inspect import types import warnings