From 8f3ed7fdf84fd3adc01b663d9f664310f332dd63 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 20 May 2013 15:16:43 -0400
Subject: [PATCH 1/2] BUG: Non-unique indexing via ``loc`` and friends fixed
 (GH3659_)

BUG: deal with non_monotonic indices

CLN: convert slice_locs arrays to sliced ranges if possible
---
 RELEASE.rst                   |  2 ++
 pandas/core/index.py          | 66 ++++++++++++++++++++++++++++++++---
 pandas/core/indexing.py       |  1 +
 pandas/tests/test_indexing.py | 24 +++++++++++++
 4 files changed, 89 insertions(+), 4 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index 9b3cc3683c3de..e02ad66252bdc 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -115,6 +115,7 @@ pandas 0.11.1
       and handle missing elements like unique indices (GH3561_)
     - Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_)
     - Concat to produce a non-unique columns when duplicates are across dtypes is fixed (GH3602_)
+    - Non-unique indexing with a slice via ``loc`` and friends fixed (GH3659_)
   - Fixed bug in groupby with empty series referencing a variable before assignment. (GH3510_)
   - Fixed bug in mixed-frame assignment with aligned series (GH3492_)
   - Fixed bug in selecting month/quarter/year from a series would not select the time element
@@ -215,6 +216,7 @@ pandas 0.11.1
 .. _GH3638: https://github.com/pydata/pandas/issues/3638
 .. _GH3605: https://github.com/pydata/pandas/issues/3605
 .. _GH3606: https://github.com/pydata/pandas/issues/3606
+.. _GH3659: https://github.com/pydata/pandas/issues/3659
 .. _Gh3616: https://github.com/pydata/pandas/issues/3616
 
 pandas 0.11.0
diff --git a/pandas/core/index.py b/pandas/core/index.py
index 3e5a4f5676437..cad1186c6addf 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -1219,13 +1219,39 @@ def slice_locs(self, start=None, end=None):
         -----
         This function assumes that the data is sorted, so use at your own peril
         """
+
+        is_unique = self.is_unique
         if start is None:
-            start_slice = 0
+            if is_unique:
+                start_slice = 0
+            else:
+                start_slice = np.arange(len(self))
         else:
             try:
                 start_slice = self.get_loc(start)
-                if isinstance(start_slice, slice):
+                
+                if not is_unique:
+
+                    # get_loc will return a boolean array for non_uniques
+                    # if we are not monotonic
+                    if isinstance(start_slice,np.ndarray):
+                        if not self.is_monotonic:
+                            raise KeyError("cannot peform a slice operation "
+                                           "on a non-unique non-monotonic index")
+                        start_slice = np.arange(len(self))[start_slice]
+
+                    # select all in the slice + all the rest of the entries
+                    # to the right
+                    elif isinstance(start_slice, slice):
+                        ss = np.arange(start_slice.stop,len(self))
+                        start_slice = np.arange(len(self))[start_slice]
+                        start_slice = (Index(ss) | Index(start_slice)).values
+                    else:
+                        start_slice = np.arange(start_slice,len(self))
+
+                elif isinstance(start_slice, slice):
                     start_slice = start_slice.start
+
             except KeyError:
                 if self.is_monotonic:
                     start_slice = self.searchsorted(start, side='left')
@@ -1233,20 +1259,52 @@ def slice_locs(self, start=None, end=None):
                     raise
 
         if end is None:
-            end_slice = len(self)
+            if is_unique:
+                end_slice = len(self)
+            else:
+                end_slice = np.arange(len(self))
         else:
             try:
                 end_slice = self.get_loc(end)
-                if isinstance(end_slice, slice):
+
+                if not is_unique:
+
+                    # get_loc will return a boolean array for non_uniques
+                    if isinstance(end_slice,np.ndarray):
+                        if not self.is_monotonic:
+                            raise KeyError("cannot perform a slice operation "
+                                           "on a non-unique non-monotonic index")
+                        end_slice = np.arange(len(self))[end_slice]
+                        
+                    # select all in the slice + all to the left of the entries
+                    elif isinstance(end_slice, slice):
+                        es = np.arange(0,end_slice.start)
+                        end_slice = np.arange(len(self))[end_slice]
+                        end_slice = (Index(es) | Index(end_slice)).values
+                    else:
+                        end_slice = np.arange(0,end_slice+1)
+
+                elif isinstance(end_slice, slice):
                     end_slice = end_slice.stop
                 else:
                     end_slice += 1
+
             except KeyError:
                 if self.is_monotonic:
                     end_slice = self.searchsorted(end, side='right')
                 else:
                     raise
 
+        if not is_unique:
+            # see if we can convert back to and edge slice
+            if len(start_slice) == len(end_slice) and (start_slice == end_slice).all():
+                start_slice, end_slice = start_slice[0], start_slice[-1]+1
+            # partial slice
+            elif (len(start_slice) == start_slice[-1]-start_slice[0]+1) and (
+                len(end_slice) == end_slice[-1]-end_slice[0]+1):
+                res = (Index(start_slice) & Index(end_slice)).values
+                start_slice, end_slice = res[0],res[-1]+1
+
         return start_slice, end_slice
 
     def delete(self, loc):
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index ea684ef11446c..41f20cbcc15ac 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -759,6 +759,7 @@ def _getitem_axis(self, key, axis=0):
         labels = self.obj._get_axis(axis)
 
         if isinstance(key, slice):
+            self._has_valid_type(key,axis)
             return self._get_slice_axis(key, axis=axis)
         elif com._is_bool_indexer(key):
             return self._getbool_axis(key, axis=axis)
diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
index e9afa1ae6ec1d..5891e8ac08040 100644
--- a/pandas/tests/test_indexing.py
+++ b/pandas/tests/test_indexing.py
@@ -953,6 +953,30 @@ def test_iloc_mask(self):
                                          (key,ans,r))
         warnings.filterwarnings(action='always', category=UserWarning)
 
+    def test_non_unique_loc(self):
+        ## GH3659
+        ## non-unique indexer with loc slice
+        ## https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
+
+        # these are going to raise becuase the we are non monotonic
+        df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3]) 
+        self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,None)]))
+        self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(0,None)]))
+        self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,2)]))
+
+        # monotonic are ok
+        df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3]).sort(axis=0)
+        result = df.loc[1:]
+        expected = DataFrame({'A' : [2,4,5,6], 'B' : [4, 6,7,8]}, index = [1,1,2,3])
+        assert_frame_equal(result,expected)
+
+        result = df.loc[0:]
+        assert_frame_equal(result,df)
+
+        result = df.loc[1:2]
+        expected = DataFrame({'A' : [2,4,5], 'B' : [4,6,7]}, index = [1,1,2])
+        assert_frame_equal(result,expected)
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

From c47bc50bef5b083f0aac64df5338c3fc642b2ab5 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 20 May 2013 17:45:26 -0400
Subject: [PATCH 2/2] CLN: did not need to convert to index array/slicer as the
 only

     time this happens is when a boolean array comes back from get_loc,
     means the index is non_monotonic, which is an exception in any event
---
 pandas/core/index.py | 53 +++++++-------------------------------------
 1 file changed, 8 insertions(+), 45 deletions(-)

diff --git a/pandas/core/index.py b/pandas/core/index.py
index cad1186c6addf..3a6913a924c1d 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -1222,10 +1222,7 @@ def slice_locs(self, start=None, end=None):
 
         is_unique = self.is_unique
         if start is None:
-            if is_unique:
-                start_slice = 0
-            else:
-                start_slice = np.arange(len(self))
+            start_slice = 0
         else:
             try:
                 start_slice = self.get_loc(start)
@@ -1235,21 +1232,10 @@ def slice_locs(self, start=None, end=None):
                     # get_loc will return a boolean array for non_uniques
                     # if we are not monotonic
                     if isinstance(start_slice,np.ndarray):
-                        if not self.is_monotonic:
-                            raise KeyError("cannot peform a slice operation "
-                                           "on a non-unique non-monotonic index")
-                        start_slice = np.arange(len(self))[start_slice]
-
-                    # select all in the slice + all the rest of the entries
-                    # to the right
-                    elif isinstance(start_slice, slice):
-                        ss = np.arange(start_slice.stop,len(self))
-                        start_slice = np.arange(len(self))[start_slice]
-                        start_slice = (Index(ss) | Index(start_slice)).values
-                    else:
-                        start_slice = np.arange(start_slice,len(self))
+                        raise KeyError("cannot peform a slice operation "
+                                       "on a non-unique non-monotonic index")
 
-                elif isinstance(start_slice, slice):
+                if isinstance(start_slice, slice):
                     start_slice = start_slice.start
 
             except KeyError:
@@ -1259,10 +1245,7 @@ def slice_locs(self, start=None, end=None):
                     raise
 
         if end is None:
-            if is_unique:
-                end_slice = len(self)
-            else:
-                end_slice = np.arange(len(self))
+            end_slice = len(self)
         else:
             try:
                 end_slice = self.get_loc(end)
@@ -1271,20 +1254,10 @@ def slice_locs(self, start=None, end=None):
 
                     # get_loc will return a boolean array for non_uniques
                     if isinstance(end_slice,np.ndarray):
-                        if not self.is_monotonic:
-                            raise KeyError("cannot perform a slice operation "
-                                           "on a non-unique non-monotonic index")
-                        end_slice = np.arange(len(self))[end_slice]
-                        
-                    # select all in the slice + all to the left of the entries
-                    elif isinstance(end_slice, slice):
-                        es = np.arange(0,end_slice.start)
-                        end_slice = np.arange(len(self))[end_slice]
-                        end_slice = (Index(es) | Index(end_slice)).values
-                    else:
-                        end_slice = np.arange(0,end_slice+1)
+                        raise KeyError("cannot perform a slice operation "
+                                       "on a non-unique non-monotonic index")
 
-                elif isinstance(end_slice, slice):
+                if isinstance(end_slice, slice):
                     end_slice = end_slice.stop
                 else:
                     end_slice += 1
@@ -1295,16 +1268,6 @@ def slice_locs(self, start=None, end=None):
                 else:
                     raise
 
-        if not is_unique:
-            # see if we can convert back to and edge slice
-            if len(start_slice) == len(end_slice) and (start_slice == end_slice).all():
-                start_slice, end_slice = start_slice[0], start_slice[-1]+1
-            # partial slice
-            elif (len(start_slice) == start_slice[-1]-start_slice[0]+1) and (
-                len(end_slice) == end_slice[-1]-end_slice[0]+1):
-                res = (Index(start_slice) & Index(end_slice)).values
-                start_slice, end_slice = res[0],res[-1]+1
-
         return start_slice, end_slice
 
     def delete(self, loc):