From 5e765d0632c0aad5e27bd93be12137b0721eb6b4 Mon Sep 17 00:00:00 2001
From: Nicolas Bonnotte <nicolas.bonnotte@gmail.com>
Date: Thu, 28 Jan 2016 10:09:01 +0100
Subject: [PATCH] BUG in MultiIndex.drop for not-lexsorted multi-indexes,
 #12078

Closes #12078
---
 doc/source/whatsnew/v0.18.0.txt    |  4 ++--
 pandas/indexes/multi.py            | 16 +++++++++++++++-
 pandas/tests/indexes/test_multi.py | 23 +++++++++++++++++++++++
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
index 3a188ea20f8a3..7d312165fab74 100644
--- a/doc/source/whatsnew/v0.18.0.txt
+++ b/doc/source/whatsnew/v0.18.0.txt
@@ -523,7 +523,7 @@ Bug Fixes
 - Bug in ``read_sql`` with ``pymysql`` connections failing to return chunked data (:issue:`11522`)
 - Bug in ``.to_csv`` ignoring formatting parameters ``decimal``, ``na_rep``, ``float_format`` for float indexes (:issue:`11553`)
 - Bug in ``Int64Index`` and ``Float64Index`` preventing the use of the modulo operator (:issue:`9244`)
-
+- Bug in ``MultiIndex.drop`` for not lexsorted multi-indexes (:issue:`12078`)
 
 - Bug in ``DataFrame`` when masking an empty ``DataFrame`` (:issue:`11859`)
 
@@ -544,4 +544,4 @@ Bug Fixes
 
 - Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`)
 
-- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`) 
+- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)
diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index 2d0ad1925daa0..1b7f057de9677 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -1083,10 +1083,24 @@ def drop(self, labels, level=None, errors='raise'):
         for label in labels:
             try:
                 loc = self.get_loc(label)
+                # get_loc returns either an integer, a slice, or a boolean
+                # mask
                 if isinstance(loc, int):
                     inds.append(loc)
-                else:
+                elif isinstance(loc, slice):
                     inds.extend(lrange(loc.start, loc.stop))
+                elif is_bool_indexer(loc):
+                    if self.lexsort_depth == 0:
+                        warnings.warn('dropping on a non-lexsorted multi-index'
+                                      'without a level parameter may impact '
+                                      'performance.',
+                                      PerformanceWarning,
+                                      stacklevel=2)
+                    loc = loc.nonzero()[0]
+                    inds.extend(loc)
+                else:
+                    msg = 'unsupported indexer of type {}'.format(type(loc))
+                    raise AssertionError(msg)
             except KeyError:
                 if errors != 'ignore':
                     raise
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 6bc644d84b0d0..6d49f5dcb342e 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -8,6 +8,7 @@
 
 from pandas import (date_range, MultiIndex, Index, CategoricalIndex,
                     compat)
+from pandas.io.common import PerformanceWarning
 from pandas.indexes.base import InvalidIndexError
 from pandas.compat import range, lrange, u, PY3, long, lzip
 
@@ -1419,6 +1420,28 @@ def test_droplevel_multiple(self):
         expected = index[:2].droplevel(2).droplevel(0)
         self.assertTrue(dropped.equals(expected))
 
+    def test_drop_not_lexsorted(self):
+        # GH 12078
+
+        # define the lexsorted version of the multi-index
+        tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')]
+        lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c'])
+        self.assertTrue(lexsorted_mi.is_lexsorted())
+
+        # and the not-lexsorted version
+        df = pd.DataFrame(columns=['a', 'b', 'c', 'd'],
+                          data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]])
+        df = df.pivot_table(index='a', columns=['b', 'c'], values='d')
+        df = df.reset_index()
+        not_lexsorted_mi = df.columns
+        self.assertFalse(not_lexsorted_mi.is_lexsorted())
+
+        # compare the results
+        self.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
+        with self.assert_produces_warning(PerformanceWarning):
+            self.assert_index_equal(lexsorted_mi.drop('a'),
+                                    not_lexsorted_mi.drop('a'))
+
     def test_insert(self):
         # key contained in all levels
         new_index = self.index.insert(0, ('bar', 'two'))