From 9a15512b1c6e349b4b8246d20b2d3450191b934e Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 14 Mar 2014 17:34:18 -0400 Subject: [PATCH 1/2] API: A tuple passed to DataFame.sort_index will be interpreted as the levels of the index, rather than requiring a list of tuple (GH4370) --- doc/source/basics.rst | 15 ++++++++++++--- doc/source/release.rst | 3 +++ pandas/core/frame.py | 14 +++++++++++--- pandas/tests/test_frame.py | 11 +++++++++++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index c2642501791e7..159cd05194300 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1287,9 +1287,18 @@ Some other sorting notes / nuances: * ``Series.sort`` sorts a Series by value in-place. This is to provide compatibility with NumPy methods which expect the ``ndarray.sort`` behavior. - * ``DataFrame.sort`` takes a ``column`` argument instead of ``by``. This - method will likely be deprecated in a future release in favor of just using - ``sort_index``. + +Sorting by a multi-index column +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You must be explicit about sorting when the column is a multi-index, and fully specify +all levels to ``by``. + +.. ipython:: python + + df1.columns = MultiIndex.from_tuples([('a','one'),('a','two'),('b','three')]) + df1.sort_index(by=('a','two')) + Copying ------- diff --git a/doc/source/release.rst b/doc/source/release.rst index 1dbce9f98cafa..d402a6ce6f131 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -125,6 +125,9 @@ API Changes ``DataFrame.stack`` operations where the name of the column index is used as the name of the inserted column containing the pivoted data. +- A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of + the index, rather than requiring a list of tuple (:issue:`4370`) + Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8c5243461b7b9..00582d828394f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2574,8 +2574,9 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False, axis : {0, 1} Sort index/rows versus columns by : object - Column name(s) in frame. Accepts a column name or a list or tuple - for a nested sort. + Column name(s) in frame. Accepts a column name or a list + for a nested sort. A tuple will be interpreted as the + levels of a multi-index. ascending : boolean or list, default True Sort ascending vs. descending. Specify list for multiple sort orders @@ -2602,7 +2603,7 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False, if axis != 0: raise ValueError('When sorting by column, axis must be 0 ' '(rows)') - if not isinstance(by, (tuple, list)): + if not isinstance(by, list): by = [by] if com._is_sequence(ascending) and len(by) != len(ascending): raise ValueError('Length of ascending (%d) != length of by' @@ -2629,6 +2630,13 @@ def trans(v): by = by[0] k = self[by].values if k.ndim == 2: + + # try to be helpful + if isinstance(self.columns, MultiIndex): + raise ValueError('Cannot sort by column %s in a multi-index' + ' you need to explicity provide all the levels' + % str(by)) + raise ValueError('Cannot sort by duplicate column %s' % str(by)) if isinstance(ascending, (tuple, list)): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ed88a355cf7a9..8a4207da76c52 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -9797,6 +9797,17 @@ def test_sort_index_duplicates(self): # multi-column 'by' is separate codepath df.sort_index(by=['a', 'b']) + # with multi-index + # GH4370 + df = DataFrame(np.random.randn(4,2),columns=MultiIndex.from_tuples([('a',0),('a',1)])) + with assertRaisesRegexp(ValueError, 'levels'): + df.sort_index(by='a') + + # convert tuples to a list of tuples + expected = df.sort_index(by=[('a',1)]) + result = df.sort_index(by=('a',1)) + assert_frame_equal(result, expected) + def test_sort_datetimes(self): # GH 3461, argsort / lexsort differences for a datetime column From 7bb9174e9af6be9e06caca22acde12aae0b4d895 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 14 Mar 2014 18:44:55 -0400 Subject: [PATCH 2/2] DEPR: remove 'columns' kw from DataFrame.sort (prior deprecated circa 2012) --- doc/source/release.rst | 8 ++++++++ doc/source/v0.14.0.txt | 4 +++- pandas/core/frame.py | 10 ++++------ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index d402a6ce6f131..257d17bbe321b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -128,6 +128,14 @@ API Changes - A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of the index, rather than requiring a list of tuple (:issue:`4370`) +Deprecations +~~~~~~~~~~~~ + +Prior Version Deprecations/Changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Remove ``column`` keyword from ``DataFrame.sort`` (:issue:`4370`) + Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 932ed4e1672b7..37e7f5b32ea84 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -276,7 +276,9 @@ You can use a right-hand-side of an alignable object as well. Prior Version Deprecations/Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -There are no announced changes in 0.13.1 or prior that are taking effect as of 0.14.0 +Therse are prior version deprecations that are taking effect as of 0.14.0. + +- Remove ``column`` keyword from ``DataFrame.sort`` (:issue:`4370`) Deprecations ~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 00582d828394f..8a72b0d7c4493 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2530,7 +2530,7 @@ def _m8_to_i8(x): #---------------------------------------------------------------------- # Sorting - def sort(self, columns=None, column=None, axis=0, ascending=True, + def sort(self, columns=None, axis=0, ascending=True, inplace=False): """ Sort DataFrame either by labels (along either axis) or by the values in @@ -2539,8 +2539,9 @@ def sort(self, columns=None, column=None, axis=0, ascending=True, Parameters ---------- columns : object - Column name(s) in frame. Accepts a column name or a list or tuple - for a nested sort. + Column name(s) in frame. Accepts a column name or a list + for a nested sort. A tuple will be interpreted as the + levels of a multi-index. ascending : boolean or list, default True Sort ascending vs. descending. Specify list for multiple sort orders @@ -2557,9 +2558,6 @@ def sort(self, columns=None, column=None, axis=0, ascending=True, ------- sorted : DataFrame """ - if column is not None: # pragma: no cover - warnings.warn("column is deprecated, use columns", FutureWarning) - columns = column return self.sort_index(by=columns, axis=axis, ascending=ascending, inplace=inplace)