From 226deab2e8bb0df2bde14b681dc64f3946fd261e Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Mon, 14 Sep 2020 15:08:39 +0200 Subject: [PATCH 1/5] Added example for natural sort --- environment.yml | 1 + pandas/core/generic.py | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/environment.yml b/environment.yml index 4622aac1dc6f8..b69345ee25f16 100644 --- a/environment.yml +++ b/environment.yml @@ -106,6 +106,7 @@ dependencies: - cftime # Needed for downstream xarray.CFTimeIndex test - pyreadstat # pandas.read_spss - tabulate>=0.8.3 # DataFrame.to_markdown + - natsort>=7.0.1 # DataFrame.sort_values - pip: - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master - git+https://github.com/numpy/numpydoc diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d78fa42cd1056..fadc2fb216c69 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4411,6 +4411,32 @@ def sort_values( 3 NaN 8 4 D 4 D 7 2 e 5 C 4 3 F + + Natural sort with the key function, + using the `natsort ` package. + + >>> df = pd.DataFrame({ + ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'], + ... "value": [10, 20, 30, 40, 50] + ...}) + >>> df + time value + 0 0hr 10 + 1 128hr 20 + 2 72hr 30 + 3 48hr 40 + 4 96hr 50 + >>> from natsort import index_natsorted + >>> df.sort_values( + ... by="time", + ... key=lambda x: np.argsort(index_natsorted(df["time"])) + ...) + time value + 0 0hr 10 + 3 48hr 40 + 2 72hr 30 + 4 96hr 50 + 1 128hr 20 """ raise AbstractMethodError(self) From 41c8b9f22d9e926b5d894200ceaff8e80c3b5bba Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Mon, 14 Sep 2020 15:11:43 +0200 Subject: [PATCH 2/5] Argument instead of function --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d327e291ae4a9..cf93999644b8b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4407,7 +4407,7 @@ def sort_values( 4 D 7 2 e 5 C 4 3 F - Natural sort with the key function, + Natural sort with the key argument, using the `natsort ` package. >>> df = pd.DataFrame({ From 3b56388fe41fca2b63706ff7fbc927a1541e00b2 Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Mon, 14 Sep 2020 15:55:15 +0200 Subject: [PATCH 3/5] Built requirements from conda --- requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index cc3775de3a4ba..76ea6f695c6f8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -73,6 +73,7 @@ xarray cftime pyreadstat tabulate>=0.8.3 +natsort>=7.0.1 git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master git+https://github.com/numpy/numpydoc pyflakes>=2.2.0 \ No newline at end of file From 245580d4c7c62461343d1604f77cef109397cd2f Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Mon, 14 Sep 2020 15:55:45 +0200 Subject: [PATCH 4/5] added whitespaces docstring --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cf93999644b8b..b0e979331209c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4413,7 +4413,7 @@ def sort_values( >>> df = pd.DataFrame({ ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'], ... "value": [10, 20, 30, 40, 50] - ...}) + ... }) >>> df time value 0 0hr 10 @@ -4425,7 +4425,7 @@ def sort_values( >>> df.sort_values( ... by="time", ... key=lambda x: np.argsort(index_natsorted(df["time"])) - ...) + ... ) time value 0 0hr 10 3 48hr 40 From f96a16f5673c04b8923e7a2ce4528e571866f2f4 Mon Sep 17 00:00:00 2001 From: Erfan Nariman Date: Tue, 15 Sep 2020 13:47:52 +0200 Subject: [PATCH 5/5] removed specific version --- environment.yml | 2 +- requirements-dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index b69345ee25f16..badb0ba94a670 100644 --- a/environment.yml +++ b/environment.yml @@ -106,7 +106,7 @@ dependencies: - cftime # Needed for downstream xarray.CFTimeIndex test - pyreadstat # pandas.read_spss - tabulate>=0.8.3 # DataFrame.to_markdown - - natsort>=7.0.1 # DataFrame.sort_values + - natsort # DataFrame.sort_values - pip: - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master - git+https://github.com/numpy/numpydoc diff --git a/requirements-dev.txt b/requirements-dev.txt index 76ea6f695c6f8..c53ced35d27fa 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -73,7 +73,7 @@ xarray cftime pyreadstat tabulate>=0.8.3 -natsort>=7.0.1 +natsort git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master git+https://github.com/numpy/numpydoc pyflakes>=2.2.0 \ No newline at end of file