From bfd1b383e511fe914eadd97ac8678741483f4d57 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 3 Jul 2019 18:43:09 -0700 Subject: [PATCH 01/15] implement indexers.py --- pandas/core/algorithms.py | 3 +- pandas/core/indexers.py | 151 ++++++++++++++++++++++++++++++++ pandas/core/indexing.py | 121 +------------------------ pandas/core/internals/blocks.py | 37 ++------ 4 files changed, 160 insertions(+), 152 deletions(-) create mode 100644 pandas/core/indexers.py diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4e84d7b26b707..81f213afb6756 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -27,6 +27,7 @@ from pandas.core.dtypes.missing import isna, na_value_for_dtype from pandas.core import common as com +from pandas.core.indexers import validate_indices _shared_docs = {} # type: Dict[str, str] @@ -1524,8 +1525,6 @@ def take(arr, indices, axis=0, allow_fill=False, fill_value=None): ... fill_value=-10) array([ 10, 10, -10]) """ - from pandas.core.indexing import validate_indices - if not is_array_like(arr): arr = np.asarray(arr) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py new file mode 100644 index 0000000000000..f04340109c869 --- /dev/null +++ b/pandas/core/indexers.py @@ -0,0 +1,151 @@ +""" +Low-dependency indexing utilities. +""" +import numpy as np + +from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass + + +def is_list_like_indexer(key): + # allow a list_like, but exclude NamedTuples which can be indexers + return is_list_like(key) and not (isinstance(key, tuple) and + type(key) is not tuple) + + +def length_of_indexer(indexer, target=None): + """ + return the length of a single non-tuple indexer which could be a slice + """ + if target is not None and isinstance(indexer, slice): + target_len = len(target) + start = indexer.start + stop = indexer.stop + step = indexer.step + if start is None: + start = 0 + elif start < 0: + start += target_len + if stop is None or stop > target_len: + stop = target_len + elif stop < 0: + stop += target_len + if step is None: + step = 1 + elif step < 0: + step = -step + return (stop - start + step - 1) // step + elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)): + return len(indexer) + elif not is_list_like_indexer(indexer): + return 1 + raise AssertionError("cannot find the length of the indexer") + + +def check_setitem_lengths(indexer, value, values): + """ + Validate that value and indexer are the same length. + + An special-case is allowed for when the indexer is a boolean array + and the number of true values equals the length of ``value``. In + this case, no exception is raised. + + Parameters + ---------- + indexer : sequence + The key for the setitem + value : array-like + The value for the setitem + values : array-like + The values being set into + + Returns + ------- + None + + Raises + ------ + ValueError + When the indexer is an ndarray or list and the lengths don't + match. + """ + # boolean with truth values == len of the value is ok too + if isinstance(indexer, (np.ndarray, list)): + if is_list_like(value) and len(indexer) != len(value): + if not (isinstance(indexer, np.ndarray) and + indexer.dtype == np.bool_ and + len(indexer[indexer]) == len(value)): + raise ValueError("cannot set using a list-like indexer " + "with a different length than the value") + + elif isinstance(indexer, slice): + # slice + if is_list_like(value) and len(values): + if len(value) != length_of_indexer(indexer, values): + raise ValueError("cannot set using a slice indexer with a " + "different length than the value") + + +def is_scalar_indexer(indexer, arr_value): + # return True if we are all scalar indexers + + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = tuple([indexer]) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 + for idx in indexer) + return False + + +def is_empty_indexer(indexer, arr_value): + # return a boolean if we have an empty indexer + + if is_list_like(indexer) and not len(indexer): + return True + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = tuple([indexer]) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 + for idx in indexer) + return False + + +def validate_indices(indices, n): + """ + Perform bounds-checking for an indexer. + + -1 is allowed for indicating missing values. + + Parameters + ---------- + indices : ndarray + n : int + length of the array being indexed + + Raises + ------ + ValueError + + Examples + -------- + >>> validate_indices([1, 2], 3) + # OK + >>> validate_indices([1, -2], 3) + ValueError + >>> validate_indices([1, 2, 3], 3) + IndexError + >>> validate_indices([-1, -1], 0) + # OK + >>> validate_indices([0, 1], 0) + IndexError + """ + if len(indices): + min_idx = indices.min() + if min_idx < -1: + msg = ("'indices' contains values less than allowed ({} < {})" + .format(min_idx, -1)) + raise ValueError(msg) + + max_idx = indices.max() + if max_idx >= n: + raise IndexError("indices are out-of-bounds") diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 677aefa15d200..b8e300c9f68ec 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -16,6 +16,7 @@ import pandas.core.common as com from pandas.core.index import Index, MultiIndex +from pandas.core.indexers import is_list_like_indexer, length_of_indexer # the supported indexers @@ -2353,35 +2354,6 @@ def _convert_key(self, key, is_setter=False): return key -def length_of_indexer(indexer, target=None): - """ - return the length of a single non-tuple indexer which could be a slice - """ - if target is not None and isinstance(indexer, slice): - target_len = len(target) - start = indexer.start - stop = indexer.stop - step = indexer.step - if start is None: - start = 0 - elif start < 0: - start += target_len - if stop is None or stop > target_len: - stop = target_len - elif stop < 0: - stop += target_len - if step is None: - step = 1 - elif step < 0: - step = -step - return (stop - start + step - 1) // step - elif isinstance(indexer, (ABCSeries, Index, np.ndarray, list)): - return len(indexer) - elif not is_list_like_indexer(indexer): - return 1 - raise AssertionError("cannot find the length of the indexer") - - def convert_to_index_sliceable(obj, key): """ if we are index sliceable, then return my slicer, otherwise return None @@ -2458,50 +2430,6 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: return result -def check_setitem_lengths(indexer, value, values): - """ - Validate that value and indexer are the same length. - - An special-case is allowed for when the indexer is a boolean array - and the number of true values equals the length of ``value``. In - this case, no exception is raised. - - Parameters - ---------- - indexer : sequence - The key for the setitem - value : array-like - The value for the setitem - values : array-like - The values being set into - - Returns - ------- - None - - Raises - ------ - ValueError - When the indexer is an ndarray or list and the lengths don't - match. - """ - # boolean with truth values == len of the value is ok too - if isinstance(indexer, (np.ndarray, list)): - if is_list_like(value) and len(indexer) != len(value): - if not (isinstance(indexer, np.ndarray) and - indexer.dtype == np.bool_ and - len(indexer[indexer]) == len(value)): - raise ValueError("cannot set using a list-like indexer " - "with a different length than the value") - # slice - elif isinstance(indexer, slice): - - if is_list_like(value) and len(values): - if len(value) != length_of_indexer(indexer, values): - raise ValueError("cannot set using a slice indexer with a " - "different length than the value") - - def convert_missing_indexer(indexer): """ reverse convert a missing indexer, which is a dict @@ -2576,47 +2504,6 @@ def maybe_convert_indices(indices, n): return indices -def validate_indices(indices, n): - """ - Perform bounds-checking for an indexer. - - -1 is allowed for indicating missing values. - - Parameters - ---------- - indices : ndarray - n : int - length of the array being indexed - - Raises - ------ - ValueError - - Examples - -------- - >>> validate_indices([1, 2], 3) - # OK - >>> validate_indices([1, -2], 3) - ValueError - >>> validate_indices([1, 2, 3], 3) - IndexError - >>> validate_indices([-1, -1], 0) - # OK - >>> validate_indices([0, 1], 0) - IndexError - """ - if len(indices): - min_idx = indices.min() - if min_idx < -1: - msg = ("'indices' contains values less than allowed ({} < {})" - .format(min_idx, -1)) - raise ValueError(msg) - - max_idx = indices.max() - if max_idx >= n: - raise IndexError("indices are out-of-bounds") - - def maybe_convert_ix(*args): """ We likely want to take the cross-product @@ -2646,12 +2533,6 @@ def is_nested_tuple(tup, labels): return False -def is_list_like_indexer(key): - # allow a list_like, but exclude NamedTuples which can be indexers - return is_list_like(key) and not (isinstance(key, tuple) and - type(key) is not tuple) - - def is_label_like(key): # select a label or row return not isinstance(key, slice) and not is_list_like_indexer(key) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a9b2c0491458c..71819a9fa3089 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -36,7 +36,8 @@ Categorical, DatetimeArray, ExtensionArray, PandasDtype, TimedeltaArray) from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.core.indexing import check_setitem_lengths +from pandas.core.indexers import ( + check_setitem_lengths, is_empty_indexer, is_scalar_indexer) from pandas.core.internals.arrays import extract_array import pandas.core.missing as missing from pandas.core.nanops import nanpercentile @@ -854,37 +855,13 @@ def setitem(self, indexer, value): # length checking check_setitem_lengths(indexer, value, values) - def _is_scalar_indexer(indexer): - # return True if we are all scalar indexers - - if arr_value.ndim == 1: - if not isinstance(indexer, tuple): - indexer = tuple([indexer]) - return any(isinstance(idx, np.ndarray) and len(idx) == 0 - for idx in indexer) - return False - - def _is_empty_indexer(indexer): - # return a boolean if we have an empty indexer - - if is_list_like(indexer) and not len(indexer): - return True - if arr_value.ndim == 1: - if not isinstance(indexer, tuple): - indexer = tuple([indexer]) - return any(isinstance(idx, np.ndarray) and len(idx) == 0 - for idx in indexer) - return False - - # empty indexers - # 8669 (empty) - if _is_empty_indexer(indexer): + if is_empty_indexer(indexer, arr_value): + # GH#8669 empty indexers pass - # setting a single element for each dim and with a rhs that could - # be say a list - # GH 6043 - elif _is_scalar_indexer(indexer): + elif is_scalar_indexer(indexer): + # setting a single element for each dim and with a rhs that could + # be e.g. a list; see GH#6043 values[indexer] = value # if we are an exact match (ex-broadcasting), From 746a3c005a74834a1c5b0404a5aef068512a53fb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 3 Jul 2019 18:46:13 -0700 Subject: [PATCH 02/15] add types --- pandas/core/indexers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index f04340109c869..9301a1d262da8 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -7,13 +7,13 @@ from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass -def is_list_like_indexer(key): +def is_list_like_indexer(key) -> bool: # allow a list_like, but exclude NamedTuples which can be indexers return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) -def length_of_indexer(indexer, target=None): +def length_of_indexer(indexer, target=None) -> int: """ return the length of a single non-tuple indexer which could be a slice """ @@ -86,7 +86,7 @@ def check_setitem_lengths(indexer, value, values): "different length than the value") -def is_scalar_indexer(indexer, arr_value): +def is_scalar_indexer(indexer, arr_value) -> bool: # return True if we are all scalar indexers if arr_value.ndim == 1: @@ -97,7 +97,7 @@ def is_scalar_indexer(indexer, arr_value): return False -def is_empty_indexer(indexer, arr_value): +def is_empty_indexer(indexer, arr_value) -> bool: # return a boolean if we have an empty indexer if is_list_like(indexer) and not len(indexer): @@ -110,7 +110,7 @@ def is_empty_indexer(indexer, arr_value): return False -def validate_indices(indices, n): +def validate_indices(indices: np.ndarray, n: int): """ Perform bounds-checking for an indexer. From 0233d42c8b714eec4b562429f69bf13108376a8c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 3 Jul 2019 19:02:38 -0700 Subject: [PATCH 03/15] cleanup --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b8e300c9f68ec..5bbdf15890daf 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1244,7 +1244,7 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False, raise except TypeError: pass - except (ValueError): + except ValueError: if not is_int_positional: raise From 35d3055a883f88acf28724e9724f54755c2b52c2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 3 Jul 2019 21:29:50 -0700 Subject: [PATCH 04/15] separate out helper method; add types, remove redundnat checks --- pandas/core/indexing.py | 105 +++++++++++-------------- pandas/core/internals/blocks.py | 2 +- pandas/tests/indexing/test_indexing.py | 4 +- 3 files changed, 47 insertions(+), 64 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5bbdf15890daf..b0198295aa192 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -111,10 +111,12 @@ def __getitem__(self, key): for x in key) try: values = self.obj._get_value(*key) + except (KeyError, TypeError): + # TypeError is hit if key contains non-hashable entries + pass + else: if is_scalar(values): return values - except Exception: - pass return self._getitem_tuple(key) else: @@ -537,24 +539,6 @@ def setter(item, v): # reset the sliced object if unique self.obj[item] = s - def can_do_equal_len(): - """ return True if we have an equal len settable """ - if (not len(labels) == 1 or not np.iterable(value) or - is_scalar(plane_indexer[0])): - return False - - item = labels[0] - index = self.obj[item].index - - values_len = len(value) - # equal len list/ndarray - if len(index) == values_len: - return True - elif lplane_indexer == values_len: - return True - - return False - # we need an iterable, with a ndim of at least 1 # eg. don't pass through np.array(0) if is_list_like_indexer(value) and getattr(value, 'ndim', 1) > 0: @@ -596,7 +580,8 @@ def can_do_equal_len(): setter(item, value[:, i].tolist()) # we have an equal len list/ndarray - elif can_do_equal_len(): + elif _can_do_equal_len(labels, value, plane_indexer, + lplane_indexer, self.obj): setter(labels[0], value) # per label values @@ -1090,7 +1075,7 @@ def _get_listlike_indexer(self, key, axis, raise_missing=False): raise_missing=raise_missing) return keyarr, indexer - def _getitem_iterable(self, key, axis=None): + def _getitem_iterable(self, key, axis: int): """ Index current object with an an iterable key (which can be a boolean indexer, or a collection of keys). @@ -1099,7 +1084,7 @@ def _getitem_iterable(self, key, axis=None): ---------- key : iterable Target labels, or boolean indexer - axis: int, default None + axis: int Dimension on which the indexing is being made Raises @@ -1115,10 +1100,7 @@ def _getitem_iterable(self, key, axis=None): ------- scalar, DataFrame, or Series: indexed value(s), """ - - if axis is None: - axis = self.axis or 0 - + # caller is responsible for ensuring non-None axis self._validate_key(key, axis) labels = self.obj._get_axis(axis) @@ -1295,20 +1277,16 @@ def _tuplify(self, loc): tup[0] = loc return tuple(tup) - def _get_slice_axis(self, slice_obj, axis=None): + def _get_slice_axis(self, slice_obj: slice, axis: int): + # caller is responsible for ensuring non-None axis obj = self.obj - if axis is None: - axis = self.axis or 0 - if not need_slice(slice_obj): return obj.copy(deep=False) - indexer = self._convert_slice_indexer(slice_obj, axis) - if isinstance(indexer, slice): - return self._slice(indexer, axis=axis, kind='iloc') - else: - return self.obj._take(indexer, axis=axis) + indexer = self._convert_slice_indexer(slice_obj, axis) + assert isinstance(indexer, slice), type(indexer) + return self._slice(indexer, axis=axis, kind='iloc') class _IXIndexer(_NDFrameIndexer): @@ -1416,11 +1394,11 @@ def __getitem__(self, key): if type(key) is tuple: key = tuple(com.apply_if_callable(x, self.obj) for x in key) - try: - if self._is_scalar_access(key): + if self._is_scalar_access(key): + try: return self._getitem_scalar(key) - except (KeyError, IndexError, AttributeError): - pass + except (KeyError, IndexError, AttributeError): + pass return self._getitem_tuple(key) else: # we by definition only have the 0th axis @@ -1438,9 +1416,8 @@ def _getitem_scalar(self, key): def _getitem_axis(self, key, axis=None): raise NotImplementedError() - def _getbool_axis(self, key, axis=None): - if axis is None: - axis = self.axis or 0 + def _getbool_axis(self, key, axis: int): + # caller is responsible for ensuring non-None axis labels = self.obj._get_axis(axis) key = check_bool_indexer(labels, key) inds, = key.nonzero() @@ -1449,11 +1426,9 @@ def _getbool_axis(self, key, axis=None): except Exception as detail: raise self._exception(detail) - def _get_slice_axis(self, slice_obj, axis=None): + def _get_slice_axis(self, slice_obj: slice, axis: int): """ this is pretty simple as we just have to deal with labels """ - if axis is None: - axis = self.axis or 0 - + # caller is responsible for ensuring non-None axis obj = self.obj if not need_slice(slice_obj): return obj.copy(deep=False) @@ -1465,6 +1440,8 @@ def _get_slice_axis(self, slice_obj, axis=None): if isinstance(indexer, slice): return self._slice(indexer, axis=axis, kind='iloc') else: + # DatetimeIndex overrides Index.slice_indexer and may + # return a DatetimeIndex instead of a slice object. return self.obj._take(indexer, axis=axis) @@ -1982,6 +1959,7 @@ class _iLocIndexer(_LocationIndexer): _valid_types = ("integer, integer slice (START point is INCLUDED, END " "point is EXCLUDED), listlike of integers, boolean array") _exception = IndexError + _get_slice_axis = _NDFrameIndexer._get_slice_axis def _validate_key(self, key, axis): if com.is_bool_indexer(key): @@ -2102,20 +2080,6 @@ def _getitem_tuple(self, tup): return retval - def _get_slice_axis(self, slice_obj, axis=None): - if axis is None: - axis = self.axis or 0 - obj = self.obj - - if not need_slice(slice_obj): - return obj.copy(deep=False) - - slice_obj = self._convert_slice_indexer(slice_obj, axis) - if isinstance(slice_obj, slice): - return self._slice(slice_obj, axis=axis, kind='iloc') - else: - return self.obj._take(slice_obj, axis=axis) - def _get_list_axis(self, key, axis=None): """ Return Series values by list or array of integers @@ -2604,3 +2568,22 @@ def _maybe_numeric_slice(df, slice_, include_bool=False): dtypes.append(bool) slice_ = IndexSlice[:, df.select_dtypes(include=dtypes).columns] return slice_ + + +def _can_do_equal_len(labels, value, plane_indexer, lplane_indexer, obj): + """ return True if we have an equal len settable """ + if (not len(labels) == 1 or not np.iterable(value) or + is_scalar(plane_indexer[0])): + return False + + item = labels[0] + index = obj[item].index + + values_len = len(value) + # equal len list/ndarray + if len(index) == values_len: + return True + elif lplane_indexer == values_len: + return True + + return False diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 71819a9fa3089..e8fc32189f928 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -859,7 +859,7 @@ def setitem(self, indexer, value): # GH#8669 empty indexers pass - elif is_scalar_indexer(indexer): + elif is_scalar_indexer(indexer, arr_value): # setting a single element for each dim and with a rhs that could # be e.g. a list; see GH#6043 values[indexer] = value diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 92966e721aedc..c65ff2ab1d327 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -12,8 +12,8 @@ import pandas as pd from pandas import DataFrame, Index, NaT, Series from pandas.core.generic import NDFrame -from pandas.core.indexing import ( - _maybe_numeric_slice, _non_reducing_slice, validate_indices) +from pandas.core.indexers import validate_indices +from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice from pandas.tests.indexing.common import Base, _mklbl import pandas.util.testing as tm From b1ce472c9c4807ed66d5eab30d7db4cdfeae86e0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 3 Jul 2019 21:35:27 -0700 Subject: [PATCH 05/15] blackify --- pandas/core/indexers.py | 34 ++++++++++++++++++--------------- pandas/core/indexing.py | 8 ++++---- pandas/core/internals/blocks.py | 5 ++++- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 9301a1d262da8..f196f4d883e12 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -9,8 +9,7 @@ def is_list_like_indexer(key) -> bool: # allow a list_like, but exclude NamedTuples which can be indexers - return is_list_like(key) and not (isinstance(key, tuple) and - type(key) is not tuple) + return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) def length_of_indexer(indexer, target=None) -> int: @@ -72,18 +71,24 @@ def check_setitem_lengths(indexer, value, values): # boolean with truth values == len of the value is ok too if isinstance(indexer, (np.ndarray, list)): if is_list_like(value) and len(indexer) != len(value): - if not (isinstance(indexer, np.ndarray) and - indexer.dtype == np.bool_ and - len(indexer[indexer]) == len(value)): - raise ValueError("cannot set using a list-like indexer " - "with a different length than the value") + if not ( + isinstance(indexer, np.ndarray) + and indexer.dtype == np.bool_ + and len(indexer[indexer]) == len(value) + ): + raise ValueError( + "cannot set using a list-like indexer " + "with a different length than the value" + ) elif isinstance(indexer, slice): # slice if is_list_like(value) and len(values): if len(value) != length_of_indexer(indexer, values): - raise ValueError("cannot set using a slice indexer with a " - "different length than the value") + raise ValueError( + "cannot set using a slice indexer with a " + "different length than the value" + ) def is_scalar_indexer(indexer, arr_value) -> bool: @@ -92,8 +97,7 @@ def is_scalar_indexer(indexer, arr_value) -> bool: if arr_value.ndim == 1: if not isinstance(indexer, tuple): indexer = tuple([indexer]) - return any(isinstance(idx, np.ndarray) and len(idx) == 0 - for idx in indexer) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) return False @@ -105,8 +109,7 @@ def is_empty_indexer(indexer, arr_value) -> bool: if arr_value.ndim == 1: if not isinstance(indexer, tuple): indexer = tuple([indexer]) - return any(isinstance(idx, np.ndarray) and len(idx) == 0 - for idx in indexer) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) return False @@ -142,8 +145,9 @@ def validate_indices(indices: np.ndarray, n: int): if len(indices): min_idx = indices.min() if min_idx < -1: - msg = ("'indices' contains values less than allowed ({} < {})" - .format(min_idx, -1)) + msg = "'indices' contains values less than allowed ({} < {})".format( + min_idx, -1 + ) raise ValueError(msg) max_idx = indices.max() diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b109de6673a9a..9ad0ecbf22f8b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -596,8 +596,9 @@ def setter(item, v): setter(item, value[:, i].tolist()) # we have an equal len list/ndarray - elif _can_do_equal_len(labels, value, plane_indexer, - lplane_indexer, self.obj): + elif _can_do_equal_len( + labels, value, plane_indexer, lplane_indexer, self.obj + ): setter(labels[0], value) # per label values @@ -2626,8 +2627,7 @@ def _maybe_numeric_slice(df, slice_, include_bool=False): def _can_do_equal_len(labels, value, plane_indexer, lplane_indexer, obj): """ return True if we have an equal len settable """ - if (not len(labels) == 1 or not np.iterable(value) or - is_scalar(plane_indexer[0])): + if not len(labels) == 1 or not np.iterable(value) or is_scalar(plane_indexer[0]): return False item = labels[0] diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3704201ca4749..9d9a019a7158c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -73,7 +73,10 @@ from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.indexers import ( - check_setitem_lengths, is_empty_indexer, is_scalar_indexer) + check_setitem_lengths, + is_empty_indexer, + is_scalar_indexer, +) from pandas.core.internals.arrays import extract_array import pandas.core.missing as missing from pandas.core.nanops import nanpercentile From b2a101aea86dd75d60a02175edf9d644879ec4fb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 4 Jul 2019 07:15:49 -0700 Subject: [PATCH 06/15] types --- pandas/core/indexers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index f196f4d883e12..7027a733ae8fb 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -41,7 +41,7 @@ def length_of_indexer(indexer, target=None) -> int: raise AssertionError("cannot find the length of the indexer") -def check_setitem_lengths(indexer, value, values): +def check_setitem_lengths(indexer, value, values) -> None: """ Validate that value and indexer are the same length. @@ -113,7 +113,7 @@ def is_empty_indexer(indexer, arr_value) -> bool: return False -def validate_indices(indices: np.ndarray, n: int): +def validate_indices(indices: np.ndarray, n: int) -> None: """ Perform bounds-checking for an indexer. From 51ceb24c5b03da71d86d49ee930ed49b4a21d9f1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 4 Jul 2019 07:21:52 -0700 Subject: [PATCH 07/15] move indexers --- pandas/core/indexers.py | 46 ++++++++++++++++++++++++++++++- pandas/core/indexes/base.py | 2 +- pandas/core/indexing.py | 44 ----------------------------- pandas/core/internals/managers.py | 2 +- pandas/core/series.py | 3 +- 5 files changed, 49 insertions(+), 48 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 7027a733ae8fb..d4e9f2680e68f 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -4,7 +4,7 @@ import numpy as np from pandas.core.dtypes.common import is_list_like -from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries def is_list_like_indexer(key) -> bool: @@ -153,3 +153,47 @@ def validate_indices(indices: np.ndarray, n: int) -> None: max_idx = indices.max() if max_idx >= n: raise IndexError("indices are out-of-bounds") + + +def maybe_convert_indices(indices, n: int): + """ + Attempt to convert indices into valid, positive indices. + + If we have negative indices, translate to positive here. + If we have indices that are out-of-bounds, raise an IndexError. + + Parameters + ---------- + indices : array-like + The array of indices that we are to convert. + n : int + The number of elements in the array that we are indexing. + + Returns + ------- + valid_indices : array-like + An array-like of positive indices that correspond to the ones + that were passed in initially to this function. + + Raises + ------ + IndexError : one of the converted indices either exceeded the number + of elements (specified by `n`) OR was still negative. + """ + + if isinstance(indices, list): + indices = np.array(indices) + if len(indices) == 0: + # If list is empty, np.array will return float and cause indexing + # errors. + return np.empty(0, dtype=np.intp) + + mask = indices < 0 + if mask.any(): + indices = indices.copy() + indices[mask] += n + + mask = (indices >= n) | (indices < 0) + if mask.any(): + raise IndexError("indices are out-of-bounds") + return indices diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 973a022cfc3f1..34a0acc711e5f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -66,6 +66,7 @@ from pandas.core.arrays import ExtensionArray from pandas.core.base import IndexOpsMixin, PandasObject import pandas.core.common as com +from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.ops import get_op_result_name, make_invalid_op @@ -3317,7 +3318,6 @@ def _convert_list_indexer(self, keyarr, kind=None): # values outside the range of indices so as to trigger an # IndexError in maybe_convert_indices indexer[indexer < 0] = len(self) - from pandas.core.indexing import maybe_convert_indices return maybe_convert_indices(indexer, len(self)) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9ad0ecbf22f8b..1753d24d23f5c 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2475,50 +2475,6 @@ def get_indexer(_i, _idx): return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer)) -def maybe_convert_indices(indices, n): - """ - Attempt to convert indices into valid, positive indices. - - If we have negative indices, translate to positive here. - If we have indices that are out-of-bounds, raise an IndexError. - - Parameters - ---------- - indices : array-like - The array of indices that we are to convert. - n : int - The number of elements in the array that we are indexing. - - Returns - ------- - valid_indices : array-like - An array-like of positive indices that correspond to the ones - that were passed in initially to this function. - - Raises - ------ - IndexError : one of the converted indices either exceeded the number - of elements (specified by `n`) OR was still negative. - """ - - if isinstance(indices, list): - indices = np.array(indices) - if len(indices) == 0: - # If list is empty, np.array will return float and cause indexing - # errors. - return np.empty(0, dtype=np.intp) - - mask = indices < 0 - if mask.any(): - indices = indices.copy() - indices[mask] += n - - mask = (indices >= n) | (indices < 0) - if mask.any(): - raise IndexError("indices are out-of-bounds") - return indices - - def maybe_convert_ix(*args): """ We likely want to take the cross-product diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c5254aaa4af5f..4bf43056f172d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -34,7 +34,7 @@ import pandas.core.algorithms as algos from pandas.core.base import PandasObject from pandas.core.index import Index, MultiIndex, ensure_index -from pandas.core.indexing import maybe_convert_indices +from pandas.core.indexers import maybe_convert_indices from pandas.io.formats.printing import pprint_thing diff --git a/pandas/core/series.py b/pandas/core/series.py index b3a7f38aef8ef..1943b66818b95 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -66,12 +66,13 @@ MultiIndex, ensure_index, ) +from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.accessors import CombinedDatetimelikeProperties import pandas.core.indexes.base as ibase from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.period import PeriodIndex from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.core.indexing import check_bool_indexer, maybe_convert_indices +from pandas.core.indexing import check_bool_indexer from pandas.core.internals import SingleBlockManager from pandas.core.internals.construction import sanitize_array from pandas.core.strings import StringMethods From 7efe2d1bea8590c02a5794a2dd0750521bfee302 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 4 Jul 2019 07:24:59 -0700 Subject: [PATCH 08/15] organize file --- pandas/core/indexers.py | 110 ++++++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 49 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index d4e9f2680e68f..794869e6ac85a 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -7,39 +7,38 @@ from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries +# ----------------------------------------------------------- +# Indexer Identification + def is_list_like_indexer(key) -> bool: # allow a list_like, but exclude NamedTuples which can be indexers return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) -def length_of_indexer(indexer, target=None) -> int: - """ - return the length of a single non-tuple indexer which could be a slice - """ - if target is not None and isinstance(indexer, slice): - target_len = len(target) - start = indexer.start - stop = indexer.stop - step = indexer.step - if start is None: - start = 0 - elif start < 0: - start += target_len - if stop is None or stop > target_len: - stop = target_len - elif stop < 0: - stop += target_len - if step is None: - step = 1 - elif step < 0: - step = -step - return (stop - start + step - 1) // step - elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)): - return len(indexer) - elif not is_list_like_indexer(indexer): - return 1 - raise AssertionError("cannot find the length of the indexer") +def is_scalar_indexer(indexer, arr_value) -> bool: + # return True if we are all scalar indexers + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = tuple([indexer]) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) + return False + + +def is_empty_indexer(indexer, arr_value) -> bool: + # return a boolean if we have an empty indexer + + if is_list_like(indexer) and not len(indexer): + return True + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = tuple([indexer]) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) + return False + + +# ----------------------------------------------------------- +# Indexer Validation def check_setitem_lengths(indexer, value, values) -> None: """ @@ -91,28 +90,6 @@ def check_setitem_lengths(indexer, value, values) -> None: ) -def is_scalar_indexer(indexer, arr_value) -> bool: - # return True if we are all scalar indexers - - if arr_value.ndim == 1: - if not isinstance(indexer, tuple): - indexer = tuple([indexer]) - return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) - return False - - -def is_empty_indexer(indexer, arr_value) -> bool: - # return a boolean if we have an empty indexer - - if is_list_like(indexer) and not len(indexer): - return True - if arr_value.ndim == 1: - if not isinstance(indexer, tuple): - indexer = tuple([indexer]) - return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) - return False - - def validate_indices(indices: np.ndarray, n: int) -> None: """ Perform bounds-checking for an indexer. @@ -155,6 +132,9 @@ def validate_indices(indices: np.ndarray, n: int) -> None: raise IndexError("indices are out-of-bounds") +# ----------------------------------------------------------- +# Indexer Conversion + def maybe_convert_indices(indices, n: int): """ Attempt to convert indices into valid, positive indices. @@ -197,3 +177,35 @@ def maybe_convert_indices(indices, n: int): if mask.any(): raise IndexError("indices are out-of-bounds") return indices + + +# ----------------------------------------------------------- +# Unsorted + +def length_of_indexer(indexer, target=None) -> int: + """ + return the length of a single non-tuple indexer which could be a slice + """ + if target is not None and isinstance(indexer, slice): + target_len = len(target) + start = indexer.start + stop = indexer.stop + step = indexer.step + if start is None: + start = 0 + elif start < 0: + start += target_len + if stop is None or stop > target_len: + stop = target_len + elif stop < 0: + stop += target_len + if step is None: + step = 1 + elif step < 0: + step = -step + return (stop - start + step - 1) // step + elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)): + return len(indexer) + elif not is_list_like_indexer(indexer): + return 1 + raise AssertionError("cannot find the length of the indexer") From 3322f4e85ea66b90f6ddb901dca0b1d73f5e7002 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 6 Jul 2019 07:31:12 -0700 Subject: [PATCH 09/15] isort --- pandas/core/indexers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 794869e6ac85a..81dc914deea49 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -6,7 +6,6 @@ from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries - # ----------------------------------------------------------- # Indexer Identification From 5b6335b689e68895905321794ea3feea40467fc9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 6 Jul 2019 07:34:26 -0700 Subject: [PATCH 10/15] lint+isort compat --- pandas/core/indexers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 81dc914deea49..445c72675c3ac 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -9,6 +9,7 @@ # ----------------------------------------------------------- # Indexer Identification + def is_list_like_indexer(key) -> bool: # allow a list_like, but exclude NamedTuples which can be indexers return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) From 74c3d36a0164190a6bce8f0034005d9d6ac44d1f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 7 Jul 2019 15:08:47 -0700 Subject: [PATCH 11/15] blackify --- pandas/core/indexers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 445c72675c3ac..d4b2c5b917997 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -40,6 +40,7 @@ def is_empty_indexer(indexer, arr_value) -> bool: # ----------------------------------------------------------- # Indexer Validation + def check_setitem_lengths(indexer, value, values) -> None: """ Validate that value and indexer are the same length. @@ -135,6 +136,7 @@ def validate_indices(indices: np.ndarray, n: int) -> None: # ----------------------------------------------------------- # Indexer Conversion + def maybe_convert_indices(indices, n: int): """ Attempt to convert indices into valid, positive indices. @@ -182,6 +184,7 @@ def maybe_convert_indices(indices, n: int): # ----------------------------------------------------------- # Unsorted + def length_of_indexer(indexer, target=None) -> int: """ return the length of a single non-tuple indexer which could be a slice From 0fafcc430b2d1348389b74ab31b40c2c7e6ffe7f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 7 Jul 2019 19:44:12 -0700 Subject: [PATCH 12/15] remove assertion --- pandas/core/indexing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f697ec7b54efa..429d9f166b294 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1316,7 +1316,6 @@ def _get_slice_axis(self, slice_obj: slice, axis: int): return obj.copy(deep=False) indexer = self._convert_slice_indexer(slice_obj, axis) - assert isinstance(indexer, slice), type(indexer) return self._slice(indexer, axis=axis, kind="iloc") From 8b8442c618f9793ab7d4ab226917529573898c05 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 7 Jul 2019 19:45:28 -0700 Subject: [PATCH 13/15] add comment for GH#27259 --- pandas/core/indexing.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 429d9f166b294..ae066a41d1474 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -115,7 +115,7 @@ def __iter__(self): raise NotImplementedError("ix is not iterable") def __getitem__(self, key): - if type(key) is tuple: + if isinstance(key, tuple): key = tuple(com.apply_if_callable(x, self.obj) for x in key) try: values = self.obj._get_value(*key) @@ -124,6 +124,8 @@ def __getitem__(self, key): # generally slice or list. # TODO(ix): most/all of the TypeError cases here are for ix, # so this check can be removed once ix is removed. + # Note: InvalidIndexError is here for geopandas compat, + # see GH#27259 pass else: if is_scalar(values): @@ -1422,7 +1424,7 @@ class _LocationIndexer(_NDFrameIndexer): _exception = Exception def __getitem__(self, key): - if type(key) is tuple: + if isinstance(key, tuple): key = tuple(com.apply_if_callable(x, self.obj) for x in key) if self._is_scalar_access(key): try: From d4780373e386d9b7dccef6108e8c2ec606d02017 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 7 Jul 2019 19:50:23 -0700 Subject: [PATCH 14/15] docstring --- pandas/core/indexers.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index d4b2c5b917997..7b0030b91e4dc 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -11,6 +11,17 @@ def is_list_like_indexer(key) -> bool: + """ + Check if we have a list-like indexer that is *not* a NamedTuple. + + Parameters + ---------- + key : object + + Returns + ------- + bool + """ # allow a list_like, but exclude NamedTuples which can be indexers return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) From 39fa0141fb2a695fb47bac4a4895e3c6c8de41f5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 8 Jul 2019 06:51:00 -0700 Subject: [PATCH 15/15] revert isinstance to type checks --- pandas/core/indexing.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ef9d44e7787f9..f132e4ddbd0c0 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -115,7 +115,9 @@ def __iter__(self): raise NotImplementedError("ix is not iterable") def __getitem__(self, key): - if isinstance(key, tuple): + if type(key) is tuple: + # Note: we check the type exactly instead of with isinstance + # because NamedTuple is checked separately. key = tuple(com.apply_if_callable(x, self.obj) for x in key) try: values = self.obj._get_value(*key) @@ -1425,7 +1427,7 @@ class _LocationIndexer(_NDFrameIndexer): _exception = Exception def __getitem__(self, key): - if isinstance(key, tuple): + if type(key) is tuple: key = tuple(com.apply_if_callable(x, self.obj) for x in key) if self._is_scalar_access(key): try: