diff --git a/RELEASE.rst b/RELEASE.rst index 9b3cc3683c3de..e02ad66252bdc 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -115,6 +115,7 @@ pandas 0.11.1 and handle missing elements like unique indices (GH3561_) - Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_) - Concat to produce a non-unique columns when duplicates are across dtypes is fixed (GH3602_) + - Non-unique indexing with a slice via ``loc`` and friends fixed (GH3659_) - Fixed bug in groupby with empty series referencing a variable before assignment. (GH3510_) - Fixed bug in mixed-frame assignment with aligned series (GH3492_) - Fixed bug in selecting month/quarter/year from a series would not select the time element @@ -215,6 +216,7 @@ pandas 0.11.1 .. _GH3638: https://github.com/pydata/pandas/issues/3638 .. _GH3605: https://github.com/pydata/pandas/issues/3605 .. _GH3606: https://github.com/pydata/pandas/issues/3606 +.. _GH3659: https://github.com/pydata/pandas/issues/3659 .. _Gh3616: https://github.com/pydata/pandas/issues/3616 pandas 0.11.0 diff --git a/pandas/core/index.py b/pandas/core/index.py index 3e5a4f5676437..3a6913a924c1d 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1219,13 +1219,25 @@ def slice_locs(self, start=None, end=None): ----- This function assumes that the data is sorted, so use at your own peril """ + + is_unique = self.is_unique if start is None: start_slice = 0 else: try: start_slice = self.get_loc(start) + + if not is_unique: + + # get_loc will return a boolean array for non_uniques + # if we are not monotonic + if isinstance(start_slice,np.ndarray): + raise KeyError("cannot peform a slice operation " + "on a non-unique non-monotonic index") + if isinstance(start_slice, slice): start_slice = start_slice.start + except KeyError: if self.is_monotonic: start_slice = self.searchsorted(start, side='left') @@ -1237,10 +1249,19 @@ def slice_locs(self, start=None, end=None): else: try: end_slice = self.get_loc(end) + + if not is_unique: + + # get_loc will return a boolean array for non_uniques + if isinstance(end_slice,np.ndarray): + raise KeyError("cannot perform a slice operation " + "on a non-unique non-monotonic index") + if isinstance(end_slice, slice): end_slice = end_slice.stop else: end_slice += 1 + except KeyError: if self.is_monotonic: end_slice = self.searchsorted(end, side='right') diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ea684ef11446c..41f20cbcc15ac 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -759,6 +759,7 @@ def _getitem_axis(self, key, axis=0): labels = self.obj._get_axis(axis) if isinstance(key, slice): + self._has_valid_type(key,axis) return self._get_slice_axis(key, axis=axis) elif com._is_bool_indexer(key): return self._getbool_axis(key, axis=axis) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index e9afa1ae6ec1d..5891e8ac08040 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -953,6 +953,30 @@ def test_iloc_mask(self): (key,ans,r)) warnings.filterwarnings(action='always', category=UserWarning) + def test_non_unique_loc(self): + ## GH3659 + ## non-unique indexer with loc slice + ## https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs + + # these are going to raise becuase the we are non monotonic + df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3]) + self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,None)])) + self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(0,None)])) + self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,2)])) + + # monotonic are ok + df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3]).sort(axis=0) + result = df.loc[1:] + expected = DataFrame({'A' : [2,4,5,6], 'B' : [4, 6,7,8]}, index = [1,1,2,3]) + assert_frame_equal(result,expected) + + result = df.loc[0:] + assert_frame_equal(result,df) + + result = df.loc[1:2] + expected = DataFrame({'A' : [2,4,5], 'B' : [4,6,7]}, index = [1,1,2]) + assert_frame_equal(result,expected) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],