diff --git a/doc/source/release.rst b/doc/source/release.rst index 798adf5504f1e..aa852c4416ade 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -183,6 +183,7 @@ Bug Fixes - Bug in ``io.data.DataReader`` when passed ``"F-F_Momentum_Factor"`` and ``data_source="famafrench"`` (:issue:`6460`) - Bug in ``sum`` of a ``timedelta64[ns]`` series (:issue:`6462`) - Bug in ``resample`` with a timezone and certain offsets (:issue:`6397`) +- Bug in ``iat/iloc`` with duplicate indices on a Series (:issue:`6493`) pandas 0.13.1 ------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fad348aed0c7d..e8d106dcfda77 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1519,7 +1519,7 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover #---------------------------------------------------------------------- # Getting and setting elements - def get_value(self, index, col): + def get_value(self, index, col, takeable=False): """ Quickly retrieve single value at passed column and index @@ -1527,16 +1527,22 @@ def get_value(self, index, col): ---------- index : row label col : column label + takeable : interpret the index/col as indexers, default False Returns ------- value : scalar value """ + + if takeable is True: + series = self._iget_item_cache(col) + return series.values[index] + series = self._get_item_cache(col) engine = self.index._engine return engine.get_value(series.values, index) - def set_value(self, index, col, value): + def set_value(self, index, col, value, takeable=False): """ Put single value at passed column and index @@ -1545,6 +1551,7 @@ def set_value(self, index, col, value): index : row label col : column label value : scalar value + takeable : interpret the index/col as indexers, default False Returns ------- @@ -1553,6 +1560,10 @@ def set_value(self, index, col, value): otherwise a new object """ try: + if takeable is True: + series = self._iget_item_cache(col) + return series.set_value(index, value, takeable=True) + series = self._get_item_cache(col) engine = self.index._engine engine.set_value(series.values, index, value) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 811604432a018..b7bcd5578bdbf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1004,6 +1004,7 @@ def __getitem__(self, item): return self._get_item_cache(item) def _get_item_cache(self, item): + """ return the cached item, item represents a label indexer """ cache = self._item_cache res = cache.get(item) if res is None: @@ -1021,6 +1022,15 @@ def _set_as_cached(self, item, cacher): a weakref to cacher """ self._cacher = (item, weakref.ref(cacher)) + def _iget_item_cache(self, item): + """ return the cached item, item represents a positional indexer """ + ax = self._info_axis + if ax.is_unique: + lower = self._get_item_cache(ax[item]) + else: + lower = self.take(item, axis=self._info_axis_number, convert=True) + return lower + def _box_item_values(self, key, values): raise NotImplementedError @@ -1595,7 +1605,8 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy, obj = obj._reindex_with_indexers( {axis: [new_index, indexer]}, method=method, - fill_value=fill_value, limit=limit, copy=copy) + fill_value=fill_value, limit=limit, copy=copy, + allow_dups=takeable) return obj diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 40c6091df64ab..662213c447688 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1419,7 +1419,7 @@ def __getitem__(self, key): raise ValueError('Invalid call for scalar access (getting)!') key = self._convert_key(key) - return self.obj.get_value(*key) + return self.obj.get_value(*key, takeable=self._takeable) def __setitem__(self, key, value): if not isinstance(key, tuple): @@ -1427,33 +1427,32 @@ def __setitem__(self, key, value): if len(key) != self.obj.ndim: raise ValueError('Not enough indexers for scalar access ' '(setting)!') - key = self._convert_key(key) + key = list(self._convert_key(key)) key.append(value) - self.obj.set_value(*key) + self.obj.set_value(*key, takeable=self._takeable) class _AtIndexer(_ScalarAccessIndexer): """ label based scalar accessor """ - pass + _takeable = False class _iAtIndexer(_ScalarAccessIndexer): """ integer based scalar accessor """ + _takeable = True def _has_valid_setitem_indexer(self, indexer): self._has_valid_positional_setitem_indexer(indexer) def _convert_key(self, key): """ require integer args (and convert to label arguments) """ - ckey = [] for a, i in zip(self.obj.axes, key): if not com.is_integer(i): raise ValueError("iAt based indexing can only have integer " "indexers") - ckey.append(a[i]) - return ckey + return key # 32-bit floating point machine epsilon _eps = np.finfo('f4').eps diff --git a/pandas/core/panel.py b/pandas/core/panel.py index cb149abb7c9cf..eba526f574375 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -444,7 +444,7 @@ def as_matrix(self): #---------------------------------------------------------------------- # Getting and setting elements - def get_value(self, *args): + def get_value(self, *args, **kwargs): """ Quickly retrieve single value at (item, major, minor) location @@ -453,6 +453,7 @@ def get_value(self, *args): item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) + takeable : interpret the passed labels as indexers, default False Returns ------- @@ -466,12 +467,16 @@ def get_value(self, *args): raise TypeError('There must be an argument for each axis, you gave' ' {0} args, but {1} are required'.format(nargs, nreq)) + takeable = kwargs.get('takeable') - # hm, two layers to the onion - frame = self._get_item_cache(args[0]) - return frame.get_value(*args[1:]) + if takeable is True: + lower = self._iget_item_cache(args[0]) + else: + lower = self._get_item_cache(args[0]) + + return lower.get_value(*args[1:], takeable=takeable) - def set_value(self, *args): + def set_value(self, *args, **kwargs): """ Quickly set single value at (item, major, minor) location @@ -481,6 +486,7 @@ def set_value(self, *args): major : major axis label (panel item row) minor : minor axis label (panel item column) value : scalar + takeable : interpret the passed labels as indexers, default False Returns ------- @@ -496,10 +502,15 @@ def set_value(self, *args): raise TypeError('There must be an argument for each axis plus the ' 'value provided, you gave {0} args, but {1} are ' 'required'.format(nargs, nreq)) + takeable = kwargs.get('takeable') try: - frame = self._get_item_cache(args[0]) - frame.set_value(*args[1:]) + if takeable is True: + lower = self._iget_item_cache(args[0]) + else: + lower = self._get_item_cache(args[0]) + + lower.set_value(*args[1:], takeable=takeable) return self except KeyError: axes = self._expand_axes(args) diff --git a/pandas/core/series.py b/pandas/core/series.py index cd5b8ed5e4efd..3d5c97e8a5ac4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -725,21 +725,24 @@ def reshape(self, *args, **kwargs): iget = _ixs irow = _ixs - def get_value(self, label): + def get_value(self, label, takeable=False): """ Quickly retrieve single value at passed index label Parameters ---------- index : label + takeable : interpret the index as indexers, default False Returns ------- value : scalar value """ + if takeable is True: + return self.values[label] return self.index.get_value(self.values, label) - def set_value(self, label, value): + def set_value(self, label, value, takeable=False): """ Quickly set single value at passed label. If label is not contained, a new object is created with the label placed at the end of the result @@ -751,6 +754,7 @@ def set_value(self, label, value): Partial indexing with MultiIndex not allowed value : object Scalar value + takeable : interpret the index as indexers, default False Returns ------- @@ -759,7 +763,10 @@ def set_value(self, label, value): otherwise a new object """ try: - self.index._engine.set_value(self.values, label, value) + if takeable: + self.values[label] = value + else: + self.index._engine.set_value(self.values, label, value) return self except KeyError: diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index deb9065a2b5a6..6e76155619c09 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -346,10 +346,15 @@ def __getitem__(self, key): return self._get_item_cache(key) @Appender(DataFrame.get_value.__doc__, indents=0) - def get_value(self, index, col): - return self._get_item_cache(col).get_value(index) + def get_value(self, index, col, takeable=False): + if takeable is True: + series = self._iget_item_cache(col) + else: + series = self._get_item_cache(col) + + return series.get_value(index, takeable=takeable) - def set_value(self, index, col, value): + def set_value(self, index, col, value, takeable=False): """ Put single value at passed column and index @@ -358,6 +363,7 @@ def set_value(self, index, col, value): index : row label col : column label value : scalar value + takeable : interpret the index/col as indexers, default False Notes ----- @@ -369,7 +375,7 @@ def set_value(self, index, col, value): ------- frame : DataFrame """ - dense = self.to_dense().set_value(index, col, value) + dense = self.to_dense().set_value(index, col, value, takeable=takeable) return dense.to_sparse(kind=self._default_kind, fill_value=self._default_fill_value) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index cf4060fa6d871..1c599653f9fc5 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -409,22 +409,23 @@ def get(self, label, default=None): else: return default - def get_value(self, label): + def get_value(self, label, takeable=False): """ Retrieve single value at passed index label Parameters ---------- index : label + takeable : interpret the index as indexers, default False Returns ------- value : scalar value """ - loc = self.index.get_loc(label) + loc = label if takeable is True else self.index.get_loc(label) return self._get_val_at(loc) - def set_value(self, label, value): + def set_value(self, label, value, takeable=False): """ Quickly set single value at passed label. If label is not contained, a new object is created with the label placed at the end of the result @@ -436,6 +437,7 @@ def set_value(self, label, value): Partial indexing with MultiIndex not allowed value : object Scalar value + takeable : interpret the index as indexers, default False Notes ----- @@ -450,7 +452,7 @@ def set_value(self, label, value): # if the label doesn't exist, we will create a new object here # and possibily change the index - new_values = values.set_value(label, value) + new_values = values.set_value(label, value, takeable=takeable) if new_values is not None: values = new_values new_index = values.index diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 3111309acff48..9c228c8654c44 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -334,6 +334,33 @@ def test_at_timestamp(self): def test_iat_invalid_args(self): pass + def test_imethods_with_dups(self): + + # GH6493 + # iat/iloc with dups + + s = Series(range(5), index=[1,1,2,2,3]) + result = s.iloc[2] + self.assertEqual(result,2) + result = s.iat[2] + self.assertEqual(result,2) + + self.assertRaises(IndexError, lambda : s.iat[10]) + self.assertRaises(IndexError, lambda : s.iat[-10]) + + result = s.iloc[[2,3]] + expected = Series([2,3],[2,2],dtype='int64') + assert_series_equal(result,expected) + + df = s.to_frame() + result = df.iloc[2] + expected = Series(2,index=[0]) + assert_series_equal(result,expected) + + result = df.iat[2,0] + expected = 2 + self.assertEqual(result,2) + def test_repeated_getitem_dups(self): # GH 5678 # repeated gettitems on a dup index returing a ndarray