Skip to content

Commit 8f3ed7f

Browse files
committed
BUG: Non-unique indexing via loc and friends fixed (GH3659_)
BUG: deal with non_monotonic indices CLN: convert slice_locs arrays to sliced ranges if possible
1 parent 88d892a commit 8f3ed7f

File tree

4 files changed

+89
-4
lines changed

4 files changed

+89
-4
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ pandas 0.11.1
115115
and handle missing elements like unique indices (GH3561_)
116116
- Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_)
117117
- Concat to produce a non-unique columns when duplicates are across dtypes is fixed (GH3602_)
118+
- Non-unique indexing with a slice via ``loc`` and friends fixed (GH3659_)
118119
- Fixed bug in groupby with empty series referencing a variable before assignment. (GH3510_)
119120
- Fixed bug in mixed-frame assignment with aligned series (GH3492_)
120121
- Fixed bug in selecting month/quarter/year from a series would not select the time element
@@ -215,6 +216,7 @@ pandas 0.11.1
215216
.. _GH3638: https://github.com/pydata/pandas/issues/3638
216217
.. _GH3605: https://github.com/pydata/pandas/issues/3605
217218
.. _GH3606: https://github.com/pydata/pandas/issues/3606
219+
.. _GH3659: https://github.com/pydata/pandas/issues/3659
218220
.. _Gh3616: https://github.com/pydata/pandas/issues/3616
219221

220222
pandas 0.11.0

pandas/core/index.py

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,34 +1219,92 @@ def slice_locs(self, start=None, end=None):
12191219
-----
12201220
This function assumes that the data is sorted, so use at your own peril
12211221
"""
1222+
1223+
is_unique = self.is_unique
12221224
if start is None:
1223-
start_slice = 0
1225+
if is_unique:
1226+
start_slice = 0
1227+
else:
1228+
start_slice = np.arange(len(self))
12241229
else:
12251230
try:
12261231
start_slice = self.get_loc(start)
1227-
if isinstance(start_slice, slice):
1232+
1233+
if not is_unique:
1234+
1235+
# get_loc will return a boolean array for non_uniques
1236+
# if we are not monotonic
1237+
if isinstance(start_slice,np.ndarray):
1238+
if not self.is_monotonic:
1239+
raise KeyError("cannot peform a slice operation "
1240+
"on a non-unique non-monotonic index")
1241+
start_slice = np.arange(len(self))[start_slice]
1242+
1243+
# select all in the slice + all the rest of the entries
1244+
# to the right
1245+
elif isinstance(start_slice, slice):
1246+
ss = np.arange(start_slice.stop,len(self))
1247+
start_slice = np.arange(len(self))[start_slice]
1248+
start_slice = (Index(ss) | Index(start_slice)).values
1249+
else:
1250+
start_slice = np.arange(start_slice,len(self))
1251+
1252+
elif isinstance(start_slice, slice):
12281253
start_slice = start_slice.start
1254+
12291255
except KeyError:
12301256
if self.is_monotonic:
12311257
start_slice = self.searchsorted(start, side='left')
12321258
else:
12331259
raise
12341260

12351261
if end is None:
1236-
end_slice = len(self)
1262+
if is_unique:
1263+
end_slice = len(self)
1264+
else:
1265+
end_slice = np.arange(len(self))
12371266
else:
12381267
try:
12391268
end_slice = self.get_loc(end)
1240-
if isinstance(end_slice, slice):
1269+
1270+
if not is_unique:
1271+
1272+
# get_loc will return a boolean array for non_uniques
1273+
if isinstance(end_slice,np.ndarray):
1274+
if not self.is_monotonic:
1275+
raise KeyError("cannot perform a slice operation "
1276+
"on a non-unique non-monotonic index")
1277+
end_slice = np.arange(len(self))[end_slice]
1278+
1279+
# select all in the slice + all to the left of the entries
1280+
elif isinstance(end_slice, slice):
1281+
es = np.arange(0,end_slice.start)
1282+
end_slice = np.arange(len(self))[end_slice]
1283+
end_slice = (Index(es) | Index(end_slice)).values
1284+
else:
1285+
end_slice = np.arange(0,end_slice+1)
1286+
1287+
elif isinstance(end_slice, slice):
12411288
end_slice = end_slice.stop
12421289
else:
12431290
end_slice += 1
1291+
12441292
except KeyError:
12451293
if self.is_monotonic:
12461294
end_slice = self.searchsorted(end, side='right')
12471295
else:
12481296
raise
12491297

1298+
if not is_unique:
1299+
# see if we can convert back to and edge slice
1300+
if len(start_slice) == len(end_slice) and (start_slice == end_slice).all():
1301+
start_slice, end_slice = start_slice[0], start_slice[-1]+1
1302+
# partial slice
1303+
elif (len(start_slice) == start_slice[-1]-start_slice[0]+1) and (
1304+
len(end_slice) == end_slice[-1]-end_slice[0]+1):
1305+
res = (Index(start_slice) & Index(end_slice)).values
1306+
start_slice, end_slice = res[0],res[-1]+1
1307+
12501308
return start_slice, end_slice
12511309

12521310
def delete(self, loc):

pandas/core/indexing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,7 @@ def _getitem_axis(self, key, axis=0):
759759
labels = self.obj._get_axis(axis)
760760

761761
if isinstance(key, slice):
762+
self._has_valid_type(key,axis)
762763
return self._get_slice_axis(key, axis=axis)
763764
elif com._is_bool_indexer(key):
764765
return self._getbool_axis(key, axis=axis)

pandas/tests/test_indexing.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,30 @@ def test_iloc_mask(self):
953953
(key,ans,r))
954954
warnings.filterwarnings(action='always', category=UserWarning)
955955

956+
def test_non_unique_loc(self):
957+
## GH3659
958+
## non-unique indexer with loc slice
959+
## https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
960+
961+
# these are going to raise becuase the we are non monotonic
962+
df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3])
963+
self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,None)]))
964+
self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(0,None)]))
965+
self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,2)]))
966+
967+
# monotonic are ok
968+
df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3]).sort(axis=0)
969+
result = df.loc[1:]
970+
expected = DataFrame({'A' : [2,4,5,6], 'B' : [4, 6,7,8]}, index = [1,1,2,3])
971+
assert_frame_equal(result,expected)
972+
973+
result = df.loc[0:]
974+
assert_frame_equal(result,df)
975+
976+
result = df.loc[1:2]
977+
expected = DataFrame({'A' : [2,4,5], 'B' : [4,6,7]}, index = [1,1,2])
978+
assert_frame_equal(result,expected)
979+
956980
if __name__ == '__main__':
957981
import nose
958982
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)