From 71a0c3ee03e3f9aa28be49936ffbed6c3f1afba6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 10:08:09 -0700 Subject: [PATCH 1/7] let checking happen in where --- pandas/core/frame.py | 11 +++++------ pandas/tests/frame/test_indexing.py | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cdbe0e9d22eb4..9fd956c40c1f0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2836,11 +2836,13 @@ def __getitem__(self, key): # Do we have a slicer (on rows)? indexer = convert_to_index_sliceable(self, key) if indexer is not None: + # either we have a slice or we have a string that can be converted + # to a slice for partial-string date indexing return self._slice(indexer, axis=0) # Do we have a (boolean) DataFrame? if isinstance(key, DataFrame): - return self._getitem_frame(key) + return self.where(key) # Do we have a (boolean) 1d indexer? if com.is_bool_indexer(key): @@ -2938,11 +2940,6 @@ def _getitem_multilevel(self, key): else: return self._get_item_cache(key) - def _getitem_frame(self, key): - if key.values.size and not is_bool_dtype(key.values): - raise ValueError("Must pass DataFrame with boolean values only") - return self.where(key) - def _get_value(self, index, col, takeable: bool = False): """ Quickly retrieve single value at passed column and index. @@ -2986,6 +2983,8 @@ def __setitem__(self, key, value): # see if we can slice the rows indexer = convert_to_index_sliceable(self, key) if indexer is not None: + # either we have a slice or we have a string that can be converted + # to a slice for partial-string date indexing return self._setitem_slice(indexer, value) if isinstance(key, DataFrame) or getattr(key, "ndim", None) == 2: diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 814a99701b703..ae14563e5952a 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -269,7 +269,7 @@ def test_getitem_boolean( subframe_obj = datetime_frame[indexer_obj] assert_frame_equal(subframe_obj, subframe) - with pytest.raises(ValueError, match="boolean values only"): + with pytest.raises(ValueError, match="Boolean array expected"): datetime_frame[datetime_frame] # test that Series work From 6d3d2d5e0cf26bb9612f0b44575e70129f2cfeb7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 10:36:16 -0700 Subject: [PATCH 2/7] cleanup, comments --- pandas/core/indexing.py | 7 ++----- pandas/core/series.py | 27 ++++++++++++--------------- pandas/core/sparse/series.py | 2 +- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a1a8619fab892..df89dbe6db6dc 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -117,6 +117,7 @@ def __iter__(self): raise NotImplementedError("ix is not iterable") def __getitem__(self, key): + # Used in ix and downstream in geopandas _CoordinateIndexer if type(key) is tuple: # Note: we check the type exactly instead of with isinstance # because NamedTuple is checked separately. @@ -181,7 +182,7 @@ def _get_setitem_indexer(self, key): pass if isinstance(key, range): - return self._convert_range(key, is_setter=True) + return list(key) axis = self.axis or 0 try: @@ -258,10 +259,6 @@ def _convert_tuple(self, key): keyidx.append(idx) return tuple(keyidx) - def _convert_range(self, key: range, is_setter: bool = False): - """ convert a range argument """ - return list(key) - def _convert_scalar_indexer(self, key, axis: int): # if we are accessing via lowered dim, use the last dim ax = self.obj._get_axis(min(axis, self.ndim - 1)) diff --git a/pandas/core/series.py b/pandas/core/series.py index c7fcab56e1fe5..6488738e82747 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1131,8 +1131,7 @@ def __getitem__(self, key): def _get_with(self, key): # other: fancy integer or otherwise if isinstance(key, slice): - indexer = self.index._convert_slice_indexer(key, kind="getitem") - return self._get_values(indexer) + return self._slice(key) elif isinstance(key, ABCDataFrame): raise TypeError( "Indexing a Series with DataFrame is not " @@ -1148,7 +1147,6 @@ def _get_with(self, key): return self._get_values(key) raise - # pragma: no cover if not isinstance(key, (list, np.ndarray, Series, Index)): key = list(key) @@ -1165,19 +1163,18 @@ def _get_with(self, key): elif key_type == "boolean": return self._get_values(key) - try: - # handle the dup indexing case (GH 4246) - if isinstance(key, (list, tuple)): - return self.loc[key] - - return self.reindex(key) - except Exception: - # [slice(0, 5, None)] will break if you convert to ndarray, - # e.g. as requested by np.median - # hack - if isinstance(key[0], slice): + if isinstance(key, (list, tuple)): + # TODO: de-dup with tuple case handled above? + # handle the dup indexing case GH#4246 + if len(key) == 1 and isinstance(key[0], slice): + # [slice(0, 5, None)] will break if you convert to ndarray, + # e.g. as requested by np.median + # FIXME: hack return self._get_values(key) - raise + + return self.loc[key] + + return self.reindex(key) def _get_values_tuple(self, key): # mpl hackaround diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index fc51c06b149fd..d81cab77f09f5 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -324,7 +324,7 @@ def _ixs(self, i: int, axis: int = 0): Parameters ---------- i : int - axis: int + axis : int default 0, ignored Returns From 998a6f55fb1df79828a0e8361073fccd3d13beb1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 15:28:40 -0700 Subject: [PATCH 3/7] check earlier --- pandas/core/series.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6488738e82747..74f8d5e263c43 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1217,6 +1217,7 @@ def _get_value(self, label, takeable: bool = False): def __setitem__(self, key, value): key = com.apply_if_callable(key, self) + cacher_needs_updating = self._check_is_chained_assignment_possible() def setitem(key, value): try: @@ -1256,7 +1257,6 @@ def setitem(key, value): self._set_with(key, value) # do the setitem - cacher_needs_updating = self._check_is_chained_assignment_possible() setitem(key, value) if cacher_needs_updating: self._maybe_update_cacher() @@ -1303,6 +1303,7 @@ def _set_with(self, key, value): if isinstance(key, Index): key_type = key.inferred_type + key = key._values else: key_type = lib.infer_dtype(key, skipna=False) @@ -1317,10 +1318,7 @@ def _set_with(self, key, value): self._set_labels(key, value) def _set_labels(self, key, value): - if isinstance(key, Index): - key = key.values - else: - key = com.asarray_tuplesafe(key) + key = com.asarray_tuplesafe(key) indexer = self.index.get_indexer(key) mask = indexer == -1 if mask.any(): From d93810b0c331d531ca5b149c4e456a356d87f02d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 15:48:30 -0700 Subject: [PATCH 4/7] check earlier --- pandas/core/series.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 74f8d5e263c43..ed1ff9f65b386 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1234,9 +1234,9 @@ def setitem(key, value): elif key is Ellipsis: self[:] = value return - - self.loc[key] = value - return + else: + self.loc[key] = value + return except TypeError as e: if isinstance(key, tuple) and not isinstance(self.index, MultiIndex): @@ -1279,6 +1279,14 @@ def _set_with(self, key, value): if isinstance(key, slice): indexer = self.index._convert_slice_indexer(key, kind="getitem") return self._set_values(indexer, value) + + elif is_scalar(key) and not is_integer(key) and key not in self.index: + # GH#12862 adding an new key to the Series + # Note: have to exclude integers because that is ambiguously + # position-based + self.loc[key] = value + return + else: if isinstance(key, tuple): try: @@ -1286,12 +1294,6 @@ def _set_with(self, key, value): except Exception: pass - if is_scalar(key) and not is_integer(key) and key not in self.index: - # GH#12862 adding an new key to the Series - # Note: have to exclude integers because that is ambiguously - # position-based - self.loc[key] = value - return if is_scalar(key): key = [key] From 90fb5f6f882d7da0a1ab4a0b9c40cccf067208ed Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 16:07:29 -0700 Subject: [PATCH 5/7] implement setitem_fallback --- pandas/core/series.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index ed1ff9f65b386..aa07a80579687 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1219,6 +1219,17 @@ def __setitem__(self, key, value): key = com.apply_if_callable(key, self) cacher_needs_updating = self._check_is_chained_assignment_possible() + def setitem_fallback(key, value): + if com.is_bool_indexer(key): + key = check_bool_indexer(self.index, key) + try: + self._where(~key, value, inplace=True) + return + except InvalidIndexError: + pass + + self._set_with(key, value) + def setitem(key, value): try: self._set_with_engine(key, value) @@ -1246,15 +1257,7 @@ def setitem(key, value): if _is_unorderable_exception(e): raise IndexError(key) - if com.is_bool_indexer(key): - key = check_bool_indexer(self.index, key) - try: - self._where(~key, value, inplace=True) - return - except InvalidIndexError: - pass - - self._set_with(key, value) + setitem_fallback(key, value) # do the setitem setitem(key, value) @@ -1294,7 +1297,6 @@ def _set_with(self, key, value): except Exception: pass - if is_scalar(key): key = [key] elif not isinstance(key, (list, Series, np.ndarray)): From 3c43892244153f047d206ad9ed3ba5426508358d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 16:29:50 -0700 Subject: [PATCH 6/7] less closure --- pandas/core/series.py | 46 ++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index aa07a80579687..c49bb25a8b4a0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1230,37 +1230,29 @@ def setitem_fallback(key, value): self._set_with(key, value) - def setitem(key, value): - try: - self._set_with_engine(key, value) - return - except com.SettingWithCopyError: - raise - except (KeyError, ValueError): - values = self._values - if is_integer(key) and not self.index.inferred_type == "integer": - - values[key] = value - return - elif key is Ellipsis: - self[:] = value - return - else: - self.loc[key] = value - return + try: + self._set_with_engine(key, value) + except com.SettingWithCopyError: + raise + except (KeyError, ValueError): + values = self._values + if is_integer(key) and not self.index.inferred_type == "integer": + values[key] = value + elif key is Ellipsis: + self[:] = value + else: + self.loc[key] = value - except TypeError as e: - if isinstance(key, tuple) and not isinstance(self.index, MultiIndex): - raise ValueError("Can only tuple-index with a MultiIndex") + except TypeError as e: + if isinstance(key, tuple) and not isinstance(self.index, MultiIndex): + raise ValueError("Can only tuple-index with a MultiIndex") - # python 3 type errors should be raised - if _is_unorderable_exception(e): - raise IndexError(key) + # python 3 type errors should be raised + if _is_unorderable_exception(e): + raise IndexError(key) - setitem_fallback(key, value) + setitem_fallback(key, value) - # do the setitem - setitem(key, value) if cacher_needs_updating: self._maybe_update_cacher() From 12ee7ebd69c970cdb43d8352d2dc923280066683 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 26 Jul 2019 17:28:14 -0700 Subject: [PATCH 7/7] remove nested func entirely --- pandas/core/series.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c49bb25a8b4a0..f840b6ce649b8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1219,17 +1219,6 @@ def __setitem__(self, key, value): key = com.apply_if_callable(key, self) cacher_needs_updating = self._check_is_chained_assignment_possible() - def setitem_fallback(key, value): - if com.is_bool_indexer(key): - key = check_bool_indexer(self.index, key) - try: - self._where(~key, value, inplace=True) - return - except InvalidIndexError: - pass - - self._set_with(key, value) - try: self._set_with_engine(key, value) except com.SettingWithCopyError: @@ -1251,7 +1240,15 @@ def setitem_fallback(key, value): if _is_unorderable_exception(e): raise IndexError(key) - setitem_fallback(key, value) + if com.is_bool_indexer(key): + key = check_bool_indexer(self.index, key) + try: + self._where(~key, value, inplace=True) + return + except InvalidIndexError: + pass + + self._set_with(key, value) if cacher_needs_updating: self._maybe_update_cacher()