Skip to content

BUG: Bug in iat/iloc with duplicate indices on a Series (6493) #6495

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 27, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ Bug Fixes
- Bug in ``io.data.DataReader`` when passed ``"F-F_Momentum_Factor"`` and ``data_source="famafrench"`` (:issue:`6460`)
- Bug in ``sum`` of a ``timedelta64[ns]`` series (:issue:`6462`)
- Bug in ``resample`` with a timezone and certain offsets (:issue:`6397`)
- Bug in ``iat/iloc`` with duplicate indices on a Series (:issue:`6493`)

pandas 0.13.1
-------------
Expand Down
15 changes: 13 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1519,24 +1519,30 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover
#----------------------------------------------------------------------
# Getting and setting elements

def get_value(self, index, col):
def get_value(self, index, col, takeable=False):
"""
Quickly retrieve single value at passed column and index

Parameters
----------
index : row label
col : column label
takeable : interpret the index/col as indexers, default False

Returns
-------
value : scalar value
"""

if takeable is True:
series = self._iget_item_cache(col)
return series.values[index]

series = self._get_item_cache(col)
engine = self.index._engine
return engine.get_value(series.values, index)

def set_value(self, index, col, value):
def set_value(self, index, col, value, takeable=False):
"""
Put single value at passed column and index

Expand All @@ -1545,6 +1551,7 @@ def set_value(self, index, col, value):
index : row label
col : column label
value : scalar value
takeable : interpret the index/col as indexers, default False

Returns
-------
Expand All @@ -1553,6 +1560,10 @@ def set_value(self, index, col, value):
otherwise a new object
"""
try:
if takeable is True:
series = self._iget_item_cache(col)
return series.set_value(index, value, takeable=True)

series = self._get_item_cache(col)
engine = self.index._engine
engine.set_value(series.values, index, value)
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,7 @@ def __getitem__(self, item):
return self._get_item_cache(item)

def _get_item_cache(self, item):
""" return the cached item, item represents a label indexer """
cache = self._item_cache
res = cache.get(item)
if res is None:
Expand All @@ -1021,6 +1022,15 @@ def _set_as_cached(self, item, cacher):
a weakref to cacher """
self._cacher = (item, weakref.ref(cacher))

def _iget_item_cache(self, item):
""" return the cached item, item represents a positional indexer """
ax = self._info_axis
if ax.is_unique:
lower = self._get_item_cache(ax[item])
else:
lower = self.take(item, axis=self._info_axis_number, convert=True)
return lower

def _box_item_values(self, key, values):
raise NotImplementedError

Expand Down Expand Up @@ -1595,7 +1605,8 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy,

obj = obj._reindex_with_indexers(
{axis: [new_index, indexer]}, method=method,
fill_value=fill_value, limit=limit, copy=copy)
fill_value=fill_value, limit=limit, copy=copy,
allow_dups=takeable)

return obj

Expand Down
13 changes: 6 additions & 7 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,41 +1419,40 @@ def __getitem__(self, key):
raise ValueError('Invalid call for scalar access (getting)!')

key = self._convert_key(key)
return self.obj.get_value(*key)
return self.obj.get_value(*key, takeable=self._takeable)

def __setitem__(self, key, value):
if not isinstance(key, tuple):
key = self._tuplify(key)
if len(key) != self.obj.ndim:
raise ValueError('Not enough indexers for scalar access '
'(setting)!')
key = self._convert_key(key)
key = list(self._convert_key(key))
key.append(value)
self.obj.set_value(*key)
self.obj.set_value(*key, takeable=self._takeable)


class _AtIndexer(_ScalarAccessIndexer):

""" label based scalar accessor """
pass
_takeable = False


class _iAtIndexer(_ScalarAccessIndexer):

""" integer based scalar accessor """
_takeable = True

def _has_valid_setitem_indexer(self, indexer):
self._has_valid_positional_setitem_indexer(indexer)

def _convert_key(self, key):
""" require integer args (and convert to label arguments) """
ckey = []
for a, i in zip(self.obj.axes, key):
if not com.is_integer(i):
raise ValueError("iAt based indexing can only have integer "
"indexers")
ckey.append(a[i])
return ckey
return key

# 32-bit floating point machine epsilon
_eps = np.finfo('f4').eps
Expand Down
25 changes: 18 additions & 7 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def as_matrix(self):
#----------------------------------------------------------------------
# Getting and setting elements

def get_value(self, *args):
def get_value(self, *args, **kwargs):
"""
Quickly retrieve single value at (item, major, minor) location

Expand All @@ -453,6 +453,7 @@ def get_value(self, *args):
item : item label (panel item)
major : major axis label (panel item row)
minor : minor axis label (panel item column)
takeable : interpret the passed labels as indexers, default False

Returns
-------
Expand All @@ -466,12 +467,16 @@ def get_value(self, *args):
raise TypeError('There must be an argument for each axis, you gave'
' {0} args, but {1} are required'.format(nargs,
nreq))
takeable = kwargs.get('takeable')

# hm, two layers to the onion
frame = self._get_item_cache(args[0])
return frame.get_value(*args[1:])
if takeable is True:
lower = self._iget_item_cache(args[0])
else:
lower = self._get_item_cache(args[0])

return lower.get_value(*args[1:], takeable=takeable)

def set_value(self, *args):
def set_value(self, *args, **kwargs):
"""
Quickly set single value at (item, major, minor) location

Expand All @@ -481,6 +486,7 @@ def set_value(self, *args):
major : major axis label (panel item row)
minor : minor axis label (panel item column)
value : scalar
takeable : interpret the passed labels as indexers, default False

Returns
-------
Expand All @@ -496,10 +502,15 @@ def set_value(self, *args):
raise TypeError('There must be an argument for each axis plus the '
'value provided, you gave {0} args, but {1} are '
'required'.format(nargs, nreq))
takeable = kwargs.get('takeable')

try:
frame = self._get_item_cache(args[0])
frame.set_value(*args[1:])
if takeable is True:
lower = self._iget_item_cache(args[0])
else:
lower = self._get_item_cache(args[0])

lower.set_value(*args[1:], takeable=takeable)
return self
except KeyError:
axes = self._expand_axes(args)
Expand Down
13 changes: 10 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,21 +725,24 @@ def reshape(self, *args, **kwargs):
iget = _ixs
irow = _ixs

def get_value(self, label):
def get_value(self, label, takeable=False):
"""
Quickly retrieve single value at passed index label

Parameters
----------
index : label
takeable : interpret the index as indexers, default False

Returns
-------
value : scalar value
"""
if takeable is True:
return self.values[label]
return self.index.get_value(self.values, label)

def set_value(self, label, value):
def set_value(self, label, value, takeable=False):
"""
Quickly set single value at passed label. If label is not contained, a
new object is created with the label placed at the end of the result
Expand All @@ -751,6 +754,7 @@ def set_value(self, label, value):
Partial indexing with MultiIndex not allowed
value : object
Scalar value
takeable : interpret the index as indexers, default False

Returns
-------
Expand All @@ -759,7 +763,10 @@ def set_value(self, label, value):
otherwise a new object
"""
try:
self.index._engine.set_value(self.values, label, value)
if takeable:
self.values[label] = value
else:
self.index._engine.set_value(self.values, label, value)
return self
except KeyError:

Expand Down
14 changes: 10 additions & 4 deletions pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,15 @@ def __getitem__(self, key):
return self._get_item_cache(key)

@Appender(DataFrame.get_value.__doc__, indents=0)
def get_value(self, index, col):
return self._get_item_cache(col).get_value(index)
def get_value(self, index, col, takeable=False):
if takeable is True:
series = self._iget_item_cache(col)
else:
series = self._get_item_cache(col)

return series.get_value(index, takeable=takeable)

def set_value(self, index, col, value):
def set_value(self, index, col, value, takeable=False):
"""
Put single value at passed column and index

Expand All @@ -358,6 +363,7 @@ def set_value(self, index, col, value):
index : row label
col : column label
value : scalar value
takeable : interpret the index/col as indexers, default False

Notes
-----
Expand All @@ -369,7 +375,7 @@ def set_value(self, index, col, value):
-------
frame : DataFrame
"""
dense = self.to_dense().set_value(index, col, value)
dense = self.to_dense().set_value(index, col, value, takeable=takeable)
return dense.to_sparse(kind=self._default_kind,
fill_value=self._default_fill_value)

Expand Down
10 changes: 6 additions & 4 deletions pandas/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,22 +409,23 @@ def get(self, label, default=None):
else:
return default

def get_value(self, label):
def get_value(self, label, takeable=False):
"""
Retrieve single value at passed index label

Parameters
----------
index : label
takeable : interpret the index as indexers, default False

Returns
-------
value : scalar value
"""
loc = self.index.get_loc(label)
loc = label if takeable is True else self.index.get_loc(label)
return self._get_val_at(loc)

def set_value(self, label, value):
def set_value(self, label, value, takeable=False):
"""
Quickly set single value at passed label. If label is not contained, a
new object is created with the label placed at the end of the result
Expand All @@ -436,6 +437,7 @@ def set_value(self, label, value):
Partial indexing with MultiIndex not allowed
value : object
Scalar value
takeable : interpret the index as indexers, default False

Notes
-----
Expand All @@ -450,7 +452,7 @@ def set_value(self, label, value):

# if the label doesn't exist, we will create a new object here
# and possibily change the index
new_values = values.set_value(label, value)
new_values = values.set_value(label, value, takeable=takeable)
if new_values is not None:
values = new_values
new_index = values.index
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,33 @@ def test_at_timestamp(self):
def test_iat_invalid_args(self):
pass

def test_imethods_with_dups(self):

# GH6493
# iat/iloc with dups

s = Series(range(5), index=[1,1,2,2,3])
result = s.iloc[2]
self.assertEqual(result,2)
result = s.iat[2]
self.assertEqual(result,2)

self.assertRaises(IndexError, lambda : s.iat[10])
self.assertRaises(IndexError, lambda : s.iat[-10])

result = s.iloc[[2,3]]
expected = Series([2,3],[2,2],dtype='int64')
assert_series_equal(result,expected)

df = s.to_frame()
result = df.iloc[2]
expected = Series(2,index=[0])
assert_series_equal(result,expected)

result = df.iat[2,0]
expected = 2
self.assertEqual(result,2)

def test_repeated_getitem_dups(self):
# GH 5678
# repeated gettitems on a dup index returing a ndarray
Expand Down