From 5061a5538a8fce6f2a3dbe9047ed93924a3418ec Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 3 Apr 2023 09:32:41 +0100 Subject: [PATCH 1/8] add sort --- .../dataframe_api/dataframe_object.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index c133d79a..31d1c218 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -214,6 +214,21 @@ def get_column_names(self) -> Sequence[str]: """ ... + def sort(self, keys: Sequence[str]) -> DataFrame: + """ + Sort rows according to given columns. + + Parameters + ---------- + keys : Sequence[str] + Names of columns to sort by. + + Returns + ------- + DataFrame + """ + ... + def __eq__(self, other: DataFrame | Scalar) -> DataFrame: """ Compare for equality. From 8218782acdae3c0ee42bbf7e9be39e6564a6b70c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 3 Apr 2023 11:41:32 +0100 Subject: [PATCH 2/8] add ascending arg, and nulls position --- .../dataframe_api/dataframe_object.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 31d1c218..ab971fdb 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Sequence, Union, TYPE_CHECKING, NoReturn +from typing import Literal, Mapping, Sequence, Union, TYPE_CHECKING, NoReturn if TYPE_CHECKING: from .column_object import Column @@ -214,7 +214,13 @@ def get_column_names(self) -> Sequence[str]: """ ... - def sort(self, keys: Sequence[str]) -> DataFrame: + def sort( + self, + keys: Sequence[str], + *, + ascending: Mapping[str, bool] | None = None, + nulls_position: Literal['first', 'last'] = 'last', + ) -> DataFrame: """ Sort rows according to given columns. @@ -222,6 +228,10 @@ def sort(self, keys: Sequence[str]) -> DataFrame: ---------- keys : Sequence[str] Names of columns to sort by. + ascending : Mapping[str, bool] + Direction with which to sort each column. If + not specified, then each column will be specified + in ascending order. Returns ------- From 02bf57fb7d92c6438c941718e64ad2f44569aaa4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 3 Apr 2023 14:07:43 +0100 Subject: [PATCH 3/8] ascending: sequence[bool] or bool --- .../dataframe_api/dataframe_object.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index ab971fdb..46ea1183 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -218,7 +218,7 @@ def sort( self, keys: Sequence[str], *, - ascending: Mapping[str, bool] | None = None, + ascending: Sequence[bool] | bool = True, nulls_position: Literal['first', 'last'] = 'last', ) -> DataFrame: """ @@ -228,14 +228,24 @@ def sort( ---------- keys : Sequence[str] Names of columns to sort by. - ascending : Mapping[str, bool] - Direction with which to sort each column. If - not specified, then each column will be specified - in ascending order. + ascending : Sequence[bool] or bool + If `True`, sort by all keys in ascending order. + If `False`, sort by all keys in descending order. + If a sequence, it must be the same length as `keys`, + and determines the direction with which to use each + key to sort by. + nulls_position : {'first', 'last'} + Whether null values should be placed at the beginning + or at the end of the result. Returns ------- DataFrame + + Raises + ------ + ValueError + If `keys` and `ascending` are sequences of different lengths. """ ... From feb26d76c9acd04dff30e241aeb0d2c4ecb03070 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 12 Apr 2023 13:34:15 +0100 Subject: [PATCH 4/8] leave nan position unspecified --- spec/API_specification/dataframe_api/dataframe_object.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 46ea1183..56153eb4 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -237,6 +237,8 @@ def sort( nulls_position : {'first', 'last'} Whether null values should be placed at the beginning or at the end of the result. + Note that the position of NaNs is unspecified and may + vary based on the implementation. Returns ------- From 9cb27aa171f73db25c3ab839c2968750d4de5079 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 17 Apr 2023 08:34:45 +0100 Subject: [PATCH 5/8] sort => sorted_indices --- spec/API_specification/dataframe_api/dataframe_object.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 56153eb4..cc65c10e 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -214,7 +214,7 @@ def get_column_names(self) -> Sequence[str]: """ ... - def sort( + def sorted_indices( self, keys: Sequence[str], *, @@ -222,7 +222,11 @@ def sort( nulls_position: Literal['first', 'last'] = 'last', ) -> DataFrame: """ - Sort rows according to given columns. + Return row numbers which would sort according to given columns. + + If you need to sort the DataFrame, you can simply do:: + + df.get_rows(df.sorted_indices()) Parameters ---------- From efe660974cbf2a4733860be0b18e86bd0dc79752 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 17 Apr 2023 08:38:22 +0100 Subject: [PATCH 6/8] fixup return type --- spec/API_specification/dataframe_api/dataframe_object.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index cc65c10e..e24afa74 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -220,7 +220,7 @@ def sorted_indices( *, ascending: Sequence[bool] | bool = True, nulls_position: Literal['first', 'last'] = 'last', - ) -> DataFrame: + ) -> Sequence[int]: """ Return row numbers which would sort according to given columns. @@ -246,7 +246,7 @@ def sorted_indices( Returns ------- - DataFrame + Sequence[int] Raises ------ From 89f3e4da7c5044691c9f79700c62e3266bf5dcda Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 18 Apr 2023 09:00:02 +0100 Subject: [PATCH 7/8] add missing keys --- spec/API_specification/dataframe_api/dataframe_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index e24afa74..529e965d 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -226,7 +226,7 @@ def sorted_indices( If you need to sort the DataFrame, you can simply do:: - df.get_rows(df.sorted_indices()) + df.get_rows(df.sorted_indices(keys)) Parameters ---------- From b8b4ebd510d67566cb6ce13805cf1132b34792cc Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 26 Apr 2023 15:45:17 +0100 Subject: [PATCH 8/8] update return type to column rather than sequence --- spec/API_specification/dataframe_api/dataframe_object.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 529e965d..22542e29 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -220,7 +220,7 @@ def sorted_indices( *, ascending: Sequence[bool] | bool = True, nulls_position: Literal['first', 'last'] = 'last', - ) -> Sequence[int]: + ) -> Column[int]: """ Return row numbers which would sort according to given columns. @@ -246,7 +246,7 @@ def sorted_indices( Returns ------- - Sequence[int] + Column[int] Raises ------