diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index d941ef20dc7ac..f3e7ebbbd33e8 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -287,3 +287,19 @@ def setup(self): def time_subset(self): self.p.ix[(self.inds, self.inds, self.inds)] + + +class IndexerLookup(object): + goal_time = 0.2 + + def setup(self): + self.s = Series(range(10)) + + def time_lookup_iloc(self): + self.s.iloc + + def time_lookup_ix(self): + self.s.ix + + def time_lookup_loc(self): + self.s.loc diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 53b052a955b45..847ae4f0fbf6b 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -67,7 +67,7 @@ Removal of prior version deprecations/changes Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- +- Indexers on Series or DataFrame no longer create a reference cycle (:issue:`17956`) - - diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx new file mode 100644 index 0000000000000..fb707a3c3e5e2 --- /dev/null +++ b/pandas/_libs/indexing.pyx @@ -0,0 +1,22 @@ +# cython: profile=False + +cdef class _NDFrameIndexerBase: + ''' + A base class for _NDFrameIndexer for fast instantiation and attribute + access. + ''' + cdef public object obj, name, _ndim + + def __init__(self, name, obj): + self.obj = obj + self.name = name + self._ndim = None + + @property + def ndim(self): + # Delay `ndim` instantiation until required as reading it + # from `obj` isn't entirely cheap. + ndim = self._ndim + if ndim is None: + ndim = self._ndim = self.obj.ndim + return ndim diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 118e7d5cd437b..10a1d922e0457 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,5 +1,6 @@ # pylint: disable=W0231,E1101 import collections +import functools import warnings import operator import weakref @@ -1796,23 +1797,10 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, @classmethod def _create_indexer(cls, name, indexer): """Create an indexer like _name in the class.""" - if getattr(cls, name, None) is None: - iname = '_%s' % name - setattr(cls, iname, None) - - def _indexer(self): - i = getattr(self, iname) - if i is None: - i = indexer(self, name) - setattr(self, iname, i) - return i - + _indexer = functools.partial(indexer, name) setattr(cls, name, property(_indexer, doc=indexer.__doc__)) - # add to our internal names set - cls._internal_names_set.add(iname) - def get(self, key, default=None): """ Get item from object for given key (DataFrame column, Panel slice, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 654c3510b7cf7..b2720078635a4 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -23,6 +23,7 @@ from pandas.core.common import (is_bool_indexer, _asarray_tuplesafe, is_null_slice, is_full_slice, _values_from_object) +from pandas._libs.indexing import _NDFrameIndexerBase # the supported indexers @@ -85,19 +86,14 @@ class IndexingError(Exception): pass -class _NDFrameIndexer(object): +class _NDFrameIndexer(_NDFrameIndexerBase): _valid_types = None _exception = KeyError axis = None - def __init__(self, obj, name): - self.obj = obj - self.ndim = obj.ndim - self.name = name - def __call__(self, axis=None): # we need to return a copy of ourselves - new_self = self.__class__(self.obj, self.name) + new_self = self.__class__(self.name, self.obj) if axis is not None: axis = self.obj._get_axis_number(axis) @@ -1321,7 +1317,7 @@ class _IXIndexer(_NDFrameIndexer): """ - def __init__(self, obj, name): + def __init__(self, name, obj): _ix_deprecation_warning = textwrap.dedent(""" .ix is deprecated. Please use @@ -1332,8 +1328,8 @@ def __init__(self, obj, name): http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""") # noqa warnings.warn(_ix_deprecation_warning, - DeprecationWarning, stacklevel=3) - super(_IXIndexer, self).__init__(obj, name) + DeprecationWarning, stacklevel=2) + super(_IXIndexer, self).__init__(name, obj) def _has_valid_type(self, key, axis): if isinstance(key, slice): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index d64ed98243d72..ec62023e75db4 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -5,6 +5,7 @@ import pytest +import weakref from warnings import catch_warnings from datetime import datetime @@ -881,6 +882,14 @@ def test_partial_boolean_frame_indexing(self): columns=list('ABC')) tm.assert_frame_equal(result, expected) + def test_no_reference_cycle(self): + df = pd.DataFrame({'a': [0, 1], 'b': [2, 3]}) + for name in ('loc', 'iloc', 'ix', 'at', 'iat'): + getattr(df, name) + wr = weakref.ref(df) + del df + assert wr() is None + class TestSeriesNoneCoercion(object): EXPECTED_RESULTS = [ diff --git a/setup.py b/setup.py index 158ee9493b6ac..2b4cc0ce019e2 100755 --- a/setup.py +++ b/setup.py @@ -335,6 +335,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/index.pyx', 'pandas/_libs/algos.pyx', 'pandas/_libs/join.pyx', + 'pandas/_libs/indexing.pyx', 'pandas/_libs/interval.pyx', 'pandas/_libs/hashing.pyx', 'pandas/_libs/testing.pyx', @@ -519,6 +520,7 @@ def pxd(name): 'depends': _pxi_dep['join']}, '_libs.reshape': {'pyxfile': '_libs/reshape', 'depends': _pxi_dep['reshape']}, + '_libs.indexing': {'pyxfile': '_libs/indexing'}, '_libs.interval': {'pyxfile': '_libs/interval', 'pxdfiles': ['_libs/hashtable'], 'depends': _pxi_dep['interval']},