Merge branch 'master' of git://github.com/pandas-dev/pandas into clip

yl2526 · yl2526 · commit 3ecfba13ab86 · 2017-06-21T20:59:18.000-04:00
diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt
@@ -50,13 +50,13 @@ Conversion
 Indexing
 ^^^^^^^^
 
-- Bug in ``Float64Index`` causing an empty array instead of None to be returned from ``.get(np.nan)`` on a Series whose index did not contain any NaNs (:issue:`8569`)
+- Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`)
 
 I/O
 ^^^
 
--- Bug in ``pd.read_csv()`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue`16675`)
--- Bug in ``pd.read_hdf()`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`)
+- Bug in :func:`read_csv`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`)
+- Bug in :func:`read_hdf`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`)
 
 Plotting
 ^^^^^^^^
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -22,8 +22,8 @@ New features
 
 - Support for `PEP 519 -- Adding a file system path protocol
   <https://www.python.org/dev/peps/pep-0519/>`_ on most readers and writers (:issue:`13823`)
-- Added `__fspath__` method to :class`:pandas.HDFStore`, :class:`pandas.ExcelFile`,
-  and :class:`pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`)
+- Added ``__fspath__`` method to :class:`~pandas.HDFStore`, :class:`~pandas.ExcelFile`,
+  and :class:`~pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`)
 
 .. _whatsnew_0210.enhancements.other:
 
@@ -33,12 +33,12 @@ Other Enhancements
 - The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here <merging.validation>` (:issue:`16270`)
 - ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned.  The default is ``dict``, which is backwards compatible. (:issue:`16122`)
 - ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`)
-- ``Series.rename_axis()`` and ``DataFrame.rename_axis()`` with ``inplace=True`` now return None while renaming the axis inplace. (:issue:`15704`)
-- :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL <https://docs.python.org/3/library/pickle.html#data-stream-format>`__
-- :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`)
+- ``Series.rename_axis()`` and ``DataFrame.rename_axis()`` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`)
+- :func:`to_pickle` has gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL <https://docs.python.org/3/library/pickle.html#data-stream-format>`__
+- :func:`api.types.infer_dtype` now infers decimals. (:issue:`15690`)
 - :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
-- :func:`DataFrame.clip()` and :func: `Series.cip()` have gained an inplace argument. (:issue: `15388`)
-- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins=True. (:issue:`15972`)
+- :func:`DataFrame.clip()` and :func: `Series.clip()` have gained an inplace argument. (:issue:`15388`)
+- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`)
 
 .. _whatsnew_0210.api_breaking:
 
@@ -98,13 +98,14 @@ Conversion
 Indexing
 ^^^^^^^^
 
-- When called with a null slice (e.g. ``df.iloc[:]``), the``iloc`` and ``loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
+- When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
+- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).
 
 
 I/O
 ^^^
 
-- Bug in ``pd.read_csv()`` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`)
+- Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`)
 
 
 Plotting
@@ -114,7 +115,7 @@ Plotting
 
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
-- Bug in ``DataFrame.resample().size()`` where an empty DataFrame did not return a Series (:issue:`14962`)
+- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
 
 
 
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -411,6 +411,13 @@ def is_null_slice(obj):
             obj.stop is None and obj.step is None)
 
 
+def is_true_slices(l):
+    """
+    Find non-trivial slices in "l": return a list of booleans with same length.
+    """
+    return [isinstance(k, slice) and not is_null_slice(k) for k in l]
+
+
 def is_full_slice(obj, l):
     """ we have a full length slice """
     return (isinstance(obj, slice) and obj.start == 0 and obj.stop == l and
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -23,7 +23,8 @@
 from pandas.errors import PerformanceWarning, UnsortedIndexError
 from pandas.core.common import (_values_from_object,
                                 is_bool_indexer,
-                                is_null_slice)
+                                is_null_slice,
+                                is_true_slices)
 
 import pandas.core.base as base
 from pandas.util._decorators import (Appender, cache_readonly,
@@ -1035,12 +1036,6 @@ def is_lexsorted(self):
         """
         return self.lexsort_depth == self.nlevels
 
-    def is_lexsorted_for_tuple(self, tup):
-        """
-        Return True if we are correctly lexsorted given the passed tuple
-        """
-        return len(tup) <= self.lexsort_depth
-
     @cache_readonly
     def lexsort_depth(self):
         if self.sortorder is not None:
@@ -2262,12 +2257,12 @@ def get_locs(self, tup):
         """
 
         # must be lexsorted to at least as many levels
-        if not self.is_lexsorted_for_tuple(tup):
-            raise UnsortedIndexError('MultiIndex Slicing requires the index '
-                                     'to be fully lexsorted tuple len ({0}), '
-                                     'lexsort depth ({1})'
-                                     .format(len(tup), self.lexsort_depth))
-
+        true_slices = [i for (i, s) in enumerate(is_true_slices(tup)) if s]
+        if true_slices and true_slices[-1] >= self.lexsort_depth:
+            raise UnsortedIndexError('MultiIndex slicing requires the index '
+                                     'to be lexsorted: slicing on levels {0}, '
+                                     'lexsort depth {1}'
+                                     .format(true_slices, self.lexsort_depth))
         # indexer
         # this is the list of all values that we want to select
         n = len(self)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
@@ -2826,8 +2826,13 @@ def test_unsortedindex(self):
         df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
                           columns=['one', 'two'])
 
+        # GH 16734: not sorted, but no real slicing
+        result = df.loc(axis=0)['z', 'a']
+        expected = df.iloc[0]
+        tm.assert_series_equal(result, expected)
+
         with pytest.raises(UnsortedIndexError):
-            df.loc(axis=0)['z', :]
+            df.loc(axis=0)['z', slice('a')]
         df.sort_index(inplace=True)
         assert len(df.loc(axis=0)['z', :]) == 2
 
diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py
@@ -817,9 +817,13 @@ def f():
         assert df.index.lexsort_depth == 0
         with tm.assert_raises_regex(
                 UnsortedIndexError,
-                'MultiIndex Slicing requires the index to be fully '
-                r'lexsorted tuple len \(2\), lexsort depth \(0\)'):
-            df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
+                'MultiIndex slicing requires the index to be '
+                r'lexsorted: slicing on levels \[1\], lexsort depth 0'):
+            df.loc[(slice(None), slice('bar')), :]
+
+        # GH 16734: not sorted, but no real slicing
+        result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
+        tm.assert_frame_equal(result, df.iloc[[1, 3], :])
 
     def test_multiindex_slicers_non_unique(self):
 
@@ -1001,9 +1005,14 @@ def test_per_axis_per_level_doc_examples(self):
 
         # not sorted
         def f():
-            df.loc['A1', (slice(None), 'foo')]
+            df.loc['A1', ('a', slice('foo'))]
 
         pytest.raises(UnsortedIndexError, f)
+
+        # GH 16734: not sorted, but no real slicing
+        tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
+                              df.loc['A1'].iloc[:, [0, 2]])
+
         df = df.sort_index(axis=1)
 
         # slicing