From b2ca2556937833d65e6e5016f1b12725a5b94d1d Mon Sep 17 00:00:00 2001 From: Daniel Sakuma Date: Sat, 10 Mar 2018 18:35:18 -0300 Subject: [PATCH 1/5] Improve docstring for pandas.Series.sort_index --- pandas/core/series.py | 116 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7b9b8a7a75008..98831763b1a83 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2016,10 +2016,124 @@ def _try_kind_sort(arr): else: return result.__finalize__(self) - @Appender(generic._shared_docs['sort_index'] % _shared_doc_kwargs) def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True): + """ + Sort object by labels. + + Returns a new Series sorted by label if `inplace` argument is `False`, + otherwise updates the original series and returns `null`. + + Parameters + ---------- + axis : int, default 0 + Axis to direct sorting. + level : int, default None + If not None, sort on values in specified index level(s). + ascending : boolean, default true + Sort ascending vs. descending. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' + Choice of sorting algorithm. See also ndarray.np.sort for more + information. `mergesort` is the only stable algorithm. For + DataFrames, this option is only applied when sorting on a single + column or label. + na_position : {'first', 'last'}, default 'last' + If `first` puts NaNs at the beginning, `last` puts NaNs at the end. + Not implemented for MultiIndex. + sort_remaining : bool, default True + If true and sorting by level and index is multilevel, sort by other + levels too (in order) after sorting by specified level. + Returns + ------- + sorted_obj : Series + + See Also + -------- + sort_values : Sort by the value + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3,2,1,4]) + >>> s.sort_index() + 1 c + 2 b + 3 a + 4 d + dtype: object + + Sort Descending + + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3,2,1,4]) + >>> s.sort_index(ascending=False) + 4 d + 3 a + 2 b + 1 c + dtype: object + + Sort Inplace + + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3,2,1,4]) + >>> s.sort_index(inplace=True) + >>> s + 1 c + 2 b + 3 a + 4 d + dtype: object + + Sort placing NaNs at first + + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3,2,1,np.nan]) + >>> s.sort_index(na_position='first') + NaN d + 1.0 c + 2.0 b + 3.0 a + dtype: object + + Specify index level to sort + + >>> import numpy as np + >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',\ + 'baz', 'baz', 'bar', 'bar']), + ... np.array(['two', 'one', 'two', 'one',\ + 'two', 'one', 'two', 'one'])] + >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) + >>> s.sort_index(level=1) + bar one 8 + baz one 6 + foo one 4 + qux one 2 + bar two 7 + baz two 5 + foo two 3 + qux two 1 + dtype: int64 + + Does not sort by remaining levels when sorting by levels + + >>> import numpy as np + >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',\ + 'baz', 'baz', 'bar', 'bar']), + ... np.array(['two', 'one', 'two', 'one',\ + 'two', 'one', 'two', 'one'])] + >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) + >>> s.sort_index(level=1, sort_remaining=False) + qux one 2 + foo one 4 + baz one 6 + bar one 8 + qux two 1 + foo two 3 + baz two 5 + bar two 7 + dtype: int64 + """ + # TODO: this can be combined with DataFrame.sort_index impl as # almost identical inplace = validate_bool_kwarg(inplace, 'inplace') From 0bc7869d96fb75a94a831b039503dd610a67eeac Mon Sep 17 00:00:00 2001 From: Daniel Sakuma Date: Sat, 10 Mar 2018 18:51:47 -0300 Subject: [PATCH 2/5] Fix style and add description to Return section --- pandas/core/series.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 98831763b1a83..fb87b91a1d563 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2048,7 +2048,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Returns ------- - sorted_obj : Series + pandas.Series + The original Series sorted by the labels See Also -------- @@ -2098,10 +2099,10 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Specify index level to sort >>> import numpy as np - >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',\ - 'baz', 'baz', 'bar', 'bar']), - ... np.array(['two', 'one', 'two', 'one',\ - 'two', 'one', 'two', 'one'])] + >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', + ... 'baz', 'baz', 'bar', 'bar']), + ... np.array(['two', 'one', 'two', 'one', + ... 'two', 'one', 'two', 'one'])] >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) >>> s.sort_index(level=1) bar one 8 @@ -2117,10 +2118,10 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Does not sort by remaining levels when sorting by levels >>> import numpy as np - >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',\ - 'baz', 'baz', 'bar', 'bar']), - ... np.array(['two', 'one', 'two', 'one',\ - 'two', 'one', 'two', 'one'])] + >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', + ... 'baz', 'baz', 'bar', 'bar']), + ... np.array(['two', 'one', 'two', 'one', + ... 'two', 'one', 'two', 'one'])] >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) >>> s.sort_index(level=1, sort_remaining=False) qux one 2 @@ -2133,7 +2134,6 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, bar two 7 dtype: int64 """ - # TODO: this can be combined with DataFrame.sort_index impl as # almost identical inplace = validate_bool_kwarg(inplace, 'inplace') From 5e061fbb1f68209bcd2a9ae53aff6123474b7625 Mon Sep 17 00:00:00 2001 From: Daniel Sakuma Date: Mon, 12 Mar 2018 13:18:26 -0300 Subject: [PATCH 3/5] Change docstring to be more specific for Series and fix style --- pandas/core/series.py | 48 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index fb87b91a1d563..24de7313c6021 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2019,7 +2019,7 @@ def _try_kind_sort(arr): def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True): """ - Sort object by labels. + Sort Series by index labels. Returns a new Series sorted by label if `inplace` argument is `False`, otherwise updates the original series and returns `null`. @@ -2027,20 +2027,20 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Parameters ---------- axis : int, default 0 - Axis to direct sorting. - level : int, default None + Axis to direct sorting. This can only be 0 for Series. + level : int, optional If not None, sort on values in specified index level(s). - ascending : boolean, default true + ascending : bool, default true Sort ascending vs. descending. inplace : bool, default False If True, perform operation in-place. kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' - Choice of sorting algorithm. See also ndarray.np.sort for more - information. `mergesort` is the only stable algorithm. For + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. 'mergesort' is the only stable algorithm. For DataFrames, this option is only applied when sorting on a single column or label. na_position : {'first', 'last'}, default 'last' - If `first` puts NaNs at the beginning, `last` puts NaNs at the end. + If `first` puts NaNs at the beginning, 'last' puts NaNs at the end. Not implemented for MultiIndex. sort_remaining : bool, default True If true and sorting by level and index is multilevel, sort by other @@ -2053,11 +2053,13 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, See Also -------- - sort_values : Sort by the value + DataFrame.sort_index: Sort DataFrame by the index + DataFrame.sort_values: Sort DataFrame by the value + Series.sort_values : Sort Series by the value Examples -------- - >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3,2,1,4]) + >>> s = pd.Series(['a','b','c','d'], index=[3,2,1,4]) >>> s.sort_index() 1 c 2 b @@ -2067,7 +2069,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Sort Descending - >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3,2,1,4]) + >>> s = pd.Series(['a','b','c','d'], index=[3,2,1,4]) >>> s.sort_index(ascending=False) 4 d 3 a @@ -2077,7 +2079,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Sort Inplace - >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3,2,1,4]) + >>> s = pd.Series(['a','b','c','d'], index=[3,2,1,4]) >>> s.sort_index(inplace=True) >>> s 1 c @@ -2088,7 +2090,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Sort placing NaNs at first - >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3,2,1,np.nan]) + >>> s = pd.Series(['a','b','c','d'], index=[3,2,1,np.nan]) >>> s.sort_index(na_position='first') NaN d 1.0 c @@ -2098,12 +2100,11 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Specify index level to sort - >>> import numpy as np - >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', - ... 'baz', 'baz', 'bar', 'bar']), - ... np.array(['two', 'one', 'two', 'one', - ... 'two', 'one', 'two', 'one'])] - >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) + >>> arrays = [np.array(['qux','qux','foo','foo', + ... 'baz','baz','bar','bar']), + ... np.array(['two','one','two','one', + ... 'two','one','two','one'])] + >>> s = pd.Series([1,2,3,4,5,6,7,8], index=arrays) >>> s.sort_index(level=1) bar one 8 baz one 6 @@ -2117,12 +2118,11 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Does not sort by remaining levels when sorting by levels - >>> import numpy as np - >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', - ... 'baz', 'baz', 'bar', 'bar']), - ... np.array(['two', 'one', 'two', 'one', - ... 'two', 'one', 'two', 'one'])] - >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) + >>> arrays = [np.array(['qux','qux','foo','foo', + ... 'baz','baz','bar','bar']), + ... np.array(['two','one','two','one', + ... 'two','one','two','one'])] + >>> s = pd.Series([1,2,3,4,5,6,7,8], index=arrays) >>> s.sort_index(level=1, sort_remaining=False) qux one 2 foo one 4 From 74574212c854eb53c59a7fd1d17e6bb7db0fe13b Mon Sep 17 00:00:00 2001 From: Daniel Sakuma Date: Mon, 12 Mar 2018 13:52:05 -0300 Subject: [PATCH 4/5] Improve text and fix style --- pandas/core/series.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 24de7313c6021..8d5ecece254ef 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2040,7 +2040,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, DataFrames, this option is only applied when sorting on a single column or label. na_position : {'first', 'last'}, default 'last' - If `first` puts NaNs at the beginning, 'last' puts NaNs at the end. + If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. Not implemented for MultiIndex. sort_remaining : bool, default True If true and sorting by level and index is multilevel, sort by other @@ -2059,7 +2059,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Examples -------- - >>> s = pd.Series(['a','b','c','d'], index=[3,2,1,4]) + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4]) >>> s.sort_index() 1 c 2 b @@ -2069,7 +2069,6 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Sort Descending - >>> s = pd.Series(['a','b','c','d'], index=[3,2,1,4]) >>> s.sort_index(ascending=False) 4 d 3 a @@ -2079,7 +2078,6 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Sort Inplace - >>> s = pd.Series(['a','b','c','d'], index=[3,2,1,4]) >>> s.sort_index(inplace=True) >>> s 1 c @@ -2088,9 +2086,10 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, 4 d dtype: object - Sort placing NaNs at first + By default NaNs are put at the end, but use `na_position` to place + them at the beginning - >>> s = pd.Series(['a','b','c','d'], index=[3,2,1,np.nan]) + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan]) >>> s.sort_index(na_position='first') NaN d 1.0 c @@ -2100,11 +2099,11 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Specify index level to sort - >>> arrays = [np.array(['qux','qux','foo','foo', - ... 'baz','baz','bar','bar']), - ... np.array(['two','one','two','one', - ... 'two','one','two','one'])] - >>> s = pd.Series([1,2,3,4,5,6,7,8], index=arrays) + >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', + ... 'baz', 'baz', 'bar', 'bar']), + ... np.array(['two', 'one', 'two', 'one', + ... 'two', 'one', 'two', 'one'])] + >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) >>> s.sort_index(level=1) bar one 8 baz one 6 @@ -2118,11 +2117,6 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Does not sort by remaining levels when sorting by levels - >>> arrays = [np.array(['qux','qux','foo','foo', - ... 'baz','baz','bar','bar']), - ... np.array(['two','one','two','one', - ... 'two','one','two','one'])] - >>> s = pd.Series([1,2,3,4,5,6,7,8], index=arrays) >>> s.sort_index(level=1, sort_remaining=False) qux one 2 foo one 4 From 3a66142fcd3851b942b179bcd0fdc731315166fa Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 12 Mar 2018 17:59:32 +0100 Subject: [PATCH 5/5] Update series.py --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8d5ecece254ef..7800b02a3fbe2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2021,8 +2021,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, """ Sort Series by index labels. - Returns a new Series sorted by label if `inplace` argument is `False`, - otherwise updates the original series and returns `null`. + Returns a new Series sorted by label if `inplace` argument is + ``False``, otherwise updates the original series and returns None. Parameters ----------