Skip to content

Commit bbcfd92

Browse files
committed
ENH: implement non-unique indexing in series (GH4246)
DOC: release notes
1 parent 50eff60 commit bbcfd92

File tree

6 files changed

+29
-9
lines changed

6 files changed

+29
-9
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ pandas 0.12
235235
names (:issue:`3873`)
236236
- Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to
237237
``reindex`` for location-based taking
238+
- Allow non-unique indexing in series via ``.ix/.loc`` and ``__getitem`` (:issue:`4246)
238239
239240
- Fixed bug in groupby with empty series referencing a variable before assignment. (:issue:`3510`)
240241
- Allow index name to be used in groupby for non MultiIndex (:issue:`4014`)

doc/source/v0.12.0.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,7 @@ Bug Fixes
437437
names (:issue:`3873`)
438438
- Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to
439439
``reindex`` for location-based taking
440+
- Allow non-unique indexing in series via ``.ix/.loc`` and ``__getitem`` (:issue:`4246)
440441

441442
- ``DataFrame.from_records`` did not accept empty recarrays (:issue:`3682`)
442443
- ``read_html`` now correctly skips tests (:issue:`3741`)
@@ -462,7 +463,7 @@ Bug Fixes
462463
(:issue:`4089`)
463464
- Fixed bug in ``DataFrame.replace`` where a nested dict wasn't being
464465
iterated over when regex=False (:issue:`4115`)
465-
- Fixed bug in the parsing of microseconds when using the ``format``
466+
- Fixed bug in the parsing of microseconds when using the ``format``
466467
argument in ``to_datetime`` (:issue:`4152`)
467468
- Fixed bug in ``PandasAutoDateLocator`` where ``invert_xaxis`` triggered
468469
incorrectly ``MilliSecondLocator`` (:issue:`3990`)

pandas/core/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -928,7 +928,7 @@ def reindex(self, target, method=None, level=None, limit=None,
928928
if method is not None or limit is not None:
929929
raise ValueError("cannot reindex a non-unique index "
930930
"with a method or limit")
931-
indexer, _ = self.get_indexer_non_unique(target)
931+
indexer, missing = self.get_indexer_non_unique(target)
932932

933933
return target, indexer
934934

pandas/core/indexing.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -481,12 +481,12 @@ def _reindex(keys, level=None):
481481
new_indexer = (Index(cur_indexer) + Index(missing_indexer)).values
482482
new_indexer[missing_indexer] = -1
483483

484-
# need to reindex with an indexer on a specific axis
485-
from pandas.core.frame import DataFrame
486-
if not (type(self.obj) == DataFrame):
487-
raise NotImplementedError("cannot handle non-unique indexing for non-DataFrame (yet)")
484+
# reindex with the specified axis
485+
ndim = self.obj.ndim
486+
if axis+1 > ndim:
487+
raise AssertionError("invalid indexing error with non-unique index")
488488

489-
args = [None] * 4
489+
args = [None] * (2*ndim)
490490
args[2*axis] = new_labels
491491
args[2*axis+1] = new_indexer
492492

pandas/core/series.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,10 @@ def _get_with(self, key):
681681
return self._get_values(key)
682682
else:
683683
try:
684+
# handle the dup indexing case (GH 4246)
685+
if isinstance(key, (list,tuple)):
686+
return self.ix[key]
687+
684688
return self.reindex(key)
685689
except Exception:
686690
# [slice(0, 5, None)] will break if you convert to ndarray,
@@ -2637,8 +2641,13 @@ def reindex(self, index=None, method=None, level=None, fill_value=pa.NA,
26372641
new_index, indexer = self.index.reindex(index, method=method,
26382642
level=level, limit=limit,
26392643
takeable=takeable)
2644+
2645+
# GH4246 (dispatch to a common method with frame to handle possibly duplicate index)
2646+
return self._reindex_with_indexers(new_index, indexer, copy=copy, fill_value=fill_value)
2647+
2648+
def _reindex_with_indexers(self, index, indexer, copy, fill_value):
26402649
new_values = com.take_1d(self.values, indexer, fill_value=fill_value)
2641-
return Series(new_values, index=new_index, name=self.name)
2650+
return Series(new_values, index=index, name=self.name)
26422651

26432652
def reindex_axis(self, labels, axis=0, **kwargs):
26442653
""" for compatibility with higher dims """

pandas/tests/test_series.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,15 @@ def test_getitem_unordered_dup(self):
788788
self.assert_(np.isscalar(obj['c']))
789789
self.assert_(obj['c'] == 0)
790790

791+
def test_getitem_dups_with_missing(self):
792+
793+
# breaks reindex, so need to use .ix internally
794+
# GH 4246
795+
s = Series([1,2,3,4],['foo','bar','foo','bah'])
796+
expected = s.ix[['foo','bar','bah','bam']]
797+
result = s[['foo','bar','bah','bam']]
798+
assert_series_equal(result,expected)
799+
791800
def test_setitem_ambiguous_keyerror(self):
792801
s = Series(range(10), index=range(0, 20, 2))
793802
self.assertRaises(KeyError, s.__setitem__, 1, 5)
@@ -1141,7 +1150,7 @@ def test_where(self):
11411150
s = Series(np.arange(10))
11421151
mask = s > 5
11431152
self.assertRaises(ValueError, s.__setitem__, mask, ([0]*5,))
1144-
1153+
11451154
def test_where_broadcast(self):
11461155
# Test a variety of differently sized series
11471156
for size in range(2, 6):

0 commit comments

Comments
 (0)