diff --git a/doc/source/basics.rst b/doc/source/basics.rst index c2642501791e7..159cd05194300 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1287,9 +1287,18 @@ Some other sorting notes / nuances: * ``Series.sort`` sorts a Series by value in-place. This is to provide compatibility with NumPy methods which expect the ``ndarray.sort`` behavior. - * ``DataFrame.sort`` takes a ``column`` argument instead of ``by``. This - method will likely be deprecated in a future release in favor of just using - ``sort_index``. + +Sorting by a multi-index column +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You must be explicit about sorting when the column is a multi-index, and fully specify +all levels to ``by``. + +.. ipython:: python + + df1.columns = MultiIndex.from_tuples([('a','one'),('a','two'),('b','three')]) + df1.sort_index(by=('a','two')) + Copying ------- diff --git a/doc/source/release.rst b/doc/source/release.rst index 1dbce9f98cafa..257d17bbe321b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -125,6 +125,17 @@ API Changes ``DataFrame.stack`` operations where the name of the column index is used as the name of the inserted column containing the pivoted data. +- A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of + the index, rather than requiring a list of tuple (:issue:`4370`) + +Deprecations +~~~~~~~~~~~~ + +Prior Version Deprecations/Changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Remove ``column`` keyword from ``DataFrame.sort`` (:issue:`4370`) + Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 932ed4e1672b7..37e7f5b32ea84 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -276,7 +276,9 @@ You can use a right-hand-side of an alignable object as well. Prior Version Deprecations/Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -There are no announced changes in 0.13.1 or prior that are taking effect as of 0.14.0 +Therse are prior version deprecations that are taking effect as of 0.14.0. + +- Remove ``column`` keyword from ``DataFrame.sort`` (:issue:`4370`) Deprecations ~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8c5243461b7b9..8a72b0d7c4493 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2530,7 +2530,7 @@ def _m8_to_i8(x): #---------------------------------------------------------------------- # Sorting - def sort(self, columns=None, column=None, axis=0, ascending=True, + def sort(self, columns=None, axis=0, ascending=True, inplace=False): """ Sort DataFrame either by labels (along either axis) or by the values in @@ -2539,8 +2539,9 @@ def sort(self, columns=None, column=None, axis=0, ascending=True, Parameters ---------- columns : object - Column name(s) in frame. Accepts a column name or a list or tuple - for a nested sort. + Column name(s) in frame. Accepts a column name or a list + for a nested sort. A tuple will be interpreted as the + levels of a multi-index. ascending : boolean or list, default True Sort ascending vs. descending. Specify list for multiple sort orders @@ -2557,9 +2558,6 @@ def sort(self, columns=None, column=None, axis=0, ascending=True, ------- sorted : DataFrame """ - if column is not None: # pragma: no cover - warnings.warn("column is deprecated, use columns", FutureWarning) - columns = column return self.sort_index(by=columns, axis=axis, ascending=ascending, inplace=inplace) @@ -2574,8 +2572,9 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False, axis : {0, 1} Sort index/rows versus columns by : object - Column name(s) in frame. Accepts a column name or a list or tuple - for a nested sort. + Column name(s) in frame. Accepts a column name or a list + for a nested sort. A tuple will be interpreted as the + levels of a multi-index. ascending : boolean or list, default True Sort ascending vs. descending. Specify list for multiple sort orders @@ -2602,7 +2601,7 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False, if axis != 0: raise ValueError('When sorting by column, axis must be 0 ' '(rows)') - if not isinstance(by, (tuple, list)): + if not isinstance(by, list): by = [by] if com._is_sequence(ascending) and len(by) != len(ascending): raise ValueError('Length of ascending (%d) != length of by' @@ -2629,6 +2628,13 @@ def trans(v): by = by[0] k = self[by].values if k.ndim == 2: + + # try to be helpful + if isinstance(self.columns, MultiIndex): + raise ValueError('Cannot sort by column %s in a multi-index' + ' you need to explicity provide all the levels' + % str(by)) + raise ValueError('Cannot sort by duplicate column %s' % str(by)) if isinstance(ascending, (tuple, list)): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ed88a355cf7a9..8a4207da76c52 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -9797,6 +9797,17 @@ def test_sort_index_duplicates(self): # multi-column 'by' is separate codepath df.sort_index(by=['a', 'b']) + # with multi-index + # GH4370 + df = DataFrame(np.random.randn(4,2),columns=MultiIndex.from_tuples([('a',0),('a',1)])) + with assertRaisesRegexp(ValueError, 'levels'): + df.sort_index(by='a') + + # convert tuples to a list of tuples + expected = df.sort_index(by=[('a',1)]) + result = df.sort_index(by=('a',1)) + assert_frame_equal(result, expected) + def test_sort_datetimes(self): # GH 3461, argsort / lexsort differences for a datetime column