-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
PERF: don't call RangeIndex._data unnecessarily #26565
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,8 @@ | |
from pandas.core.indexes.base import Index, _index_shared_docs | ||
from pandas.core.indexes.numeric import Int64Index | ||
|
||
from pandas.io.formats.printing import pprint_thing | ||
|
||
|
||
class RangeIndex(Int64Index): | ||
""" | ||
|
@@ -64,6 +66,8 @@ class RangeIndex(Int64Index): | |
_typ = 'rangeindex' | ||
_engine_type = libindex.Int64Engine | ||
|
||
# check whether self._data has benn called | ||
_cached_data = None # type: np.ndarray | ||
# -------------------------------------------------------------------- | ||
# Constructors | ||
|
||
|
@@ -164,6 +168,8 @@ def _simple_new(cls, start, stop=None, step=None, name=None, | |
for k, v in kwargs.items(): | ||
setattr(result, k, v) | ||
|
||
result._range = range(result._start, result._stop, result._step) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we could actually remove the _start, _stop, _step properties as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I'm planning to do that in an upcoming PR. Python3's |
||
|
||
result._reset_identity() | ||
return result | ||
|
||
|
@@ -180,9 +186,19 @@ def _constructor(self): | |
""" return the class to use for construction """ | ||
return Int64Index | ||
|
||
@cache_readonly | ||
@property | ||
def _data(self): | ||
return np.arange(self._start, self._stop, self._step, dtype=np.int64) | ||
""" | ||
An int array that for performance reasons is created only when needed. | ||
|
||
The constructed array is saved in ``_cached_data``. This allows us to | ||
check if the array has been created without accessing ``_data`` and | ||
triggering the construction. | ||
""" | ||
if self._cached_data is None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you give this a doc-string (e.g. that cached_data is actually an int array and be constructed only if necessary for performance reasons |
||
self._cached_data = np.arange(self._start, self._stop, self._step, | ||
dtype=np.int64) | ||
return self._cached_data | ||
|
||
@cache_readonly | ||
def _int64index(self): | ||
|
@@ -215,6 +231,9 @@ def _format_data(self, name=None): | |
# we are formatting thru the attributes | ||
return None | ||
|
||
def _format_with_header(self, header, na_rep='NaN', **kwargs): | ||
return header + list(map(pprint_thing, self._range)) | ||
|
||
# -------------------------------------------------------------------- | ||
@property | ||
def start(self): | ||
|
@@ -296,6 +315,15 @@ def is_monotonic_decreasing(self): | |
def has_duplicates(self): | ||
return False | ||
|
||
@Appender(_index_shared_docs['get_loc']) | ||
def get_loc(self, key, method=None, tolerance=None): | ||
if is_integer(key) and method is None and tolerance is None: | ||
try: | ||
return self._range.index(key) | ||
except ValueError: | ||
raise KeyError(key) | ||
return super().get_loc(key, method=method, tolerance=tolerance) | ||
|
||
def tolist(self): | ||
return list(range(self._start, self._stop, self._step)) | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.