Skip to content

Commit 89ada20

Browse files
committed
Merge pull request #3661 from jreback/loc_non_unique
BUG: Non-unique indexing via loc and friends fixed when slicing (GH3659_)
2 parents 88d892a + c47bc50 commit 89ada20

File tree

4 files changed

+48
-0
lines changed

4 files changed

+48
-0
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ pandas 0.11.1
115115
and handle missing elements like unique indices (GH3561_)
116116
- Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_)
117117
- Concat to produce a non-unique columns when duplicates are across dtypes is fixed (GH3602_)
118+
- Non-unique indexing with a slice via ``loc`` and friends fixed (GH3659_)
118119
- Fixed bug in groupby with empty series referencing a variable before assignment. (GH3510_)
119120
- Fixed bug in mixed-frame assignment with aligned series (GH3492_)
120121
- Fixed bug in selecting month/quarter/year from a series would not select the time element
@@ -215,6 +216,7 @@ pandas 0.11.1
215216
.. _GH3638: https://github.com/pydata/pandas/issues/3638
216217
.. _GH3605: https://github.com/pydata/pandas/issues/3605
217218
.. _GH3606: https://github.com/pydata/pandas/issues/3606
219+
.. _GH3659: https://github.com/pydata/pandas/issues/3659
218220
.. _Gh3616: https://github.com/pydata/pandas/issues/3616
219221

220222
pandas 0.11.0

pandas/core/index.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,13 +1219,25 @@ def slice_locs(self, start=None, end=None):
12191219
-----
12201220
This function assumes that the data is sorted, so use at your own peril
12211221
"""
1222+
1223+
is_unique = self.is_unique
12221224
if start is None:
12231225
start_slice = 0
12241226
else:
12251227
try:
12261228
start_slice = self.get_loc(start)
1229+
1230+
if not is_unique:
1231+
1232+
# get_loc will return a boolean array for non_uniques
1233+
# if we are not monotonic
1234+
if isinstance(start_slice,np.ndarray):
1235+
raise KeyError("cannot peform a slice operation "
1236+
"on a non-unique non-monotonic index")
1237+
12271238
if isinstance(start_slice, slice):
12281239
start_slice = start_slice.start
1240+
12291241
except KeyError:
12301242
if self.is_monotonic:
12311243
start_slice = self.searchsorted(start, side='left')
@@ -1237,10 +1249,19 @@ def slice_locs(self, start=None, end=None):
12371249
else:
12381250
try:
12391251
end_slice = self.get_loc(end)
1252+
1253+
if not is_unique:
1254+
1255+
# get_loc will return a boolean array for non_uniques
1256+
if isinstance(end_slice,np.ndarray):
1257+
raise KeyError("cannot perform a slice operation "
1258+
"on a non-unique non-monotonic index")
1259+
12401260
if isinstance(end_slice, slice):
12411261
end_slice = end_slice.stop
12421262
else:
12431263
end_slice += 1
1264+
12441265
except KeyError:
12451266
if self.is_monotonic:
12461267
end_slice = self.searchsorted(end, side='right')

pandas/core/indexing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,7 @@ def _getitem_axis(self, key, axis=0):
759759
labels = self.obj._get_axis(axis)
760760

761761
if isinstance(key, slice):
762+
self._has_valid_type(key,axis)
762763
return self._get_slice_axis(key, axis=axis)
763764
elif com._is_bool_indexer(key):
764765
return self._getbool_axis(key, axis=axis)

pandas/tests/test_indexing.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,30 @@ def test_iloc_mask(self):
953953
(key,ans,r))
954954
warnings.filterwarnings(action='always', category=UserWarning)
955955

956+
def test_non_unique_loc(self):
957+
## GH3659
958+
## non-unique indexer with loc slice
959+
## https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
960+
961+
# these are going to raise becuase the we are non monotonic
962+
df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3])
963+
self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,None)]))
964+
self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(0,None)]))
965+
self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,2)]))
966+
967+
# monotonic are ok
968+
df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3]).sort(axis=0)
969+
result = df.loc[1:]
970+
expected = DataFrame({'A' : [2,4,5,6], 'B' : [4, 6,7,8]}, index = [1,1,2,3])
971+
assert_frame_equal(result,expected)
972+
973+
result = df.loc[0:]
974+
assert_frame_equal(result,df)
975+
976+
result = df.loc[1:2]
977+
expected = DataFrame({'A' : [2,4,5], 'B' : [4,6,7]}, index = [1,1,2])
978+
assert_frame_equal(result,expected)
979+
956980
if __name__ == '__main__':
957981
import nose
958982
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)