From e689f0be6d7267afee341007093b39e0ef4adbd4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 17 Apr 2023 09:43:22 +0100 Subject: [PATCH 1/8] add __len__ and __getitem__ to Column --- .../dataframe_api/column_object.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 31b610b7..8e1f04ef 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -1,2 +1,24 @@ +from typing import NoReturn + class Column: - pass + def __len__(self) -> int: + """ + Return the number of rows. + """ + + def __getitem__(self, row) -> object: + """ + Get the element at row index `row`. + """ + + def __iter__(self) -> NoReturn: + """ + Iterate over elements. + + This is intentionally "poisoned" to discourage inefficient code patterns. + + Raises + ------ + NotImplementedError + """ + raise NotImplementedError("'__iter__' is intentionally not implemented.") From 1fa1030c83eae498a5200e741a349fa56393fbe1 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 17 Apr 2023 18:05:37 +0100 Subject: [PATCH 2/8] remove trailing whitespace --- spec/API_specification/dataframe_api/dataframe_object.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index c133d79a..6a68eb09 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -160,6 +160,7 @@ def drop_column(self, label: str) -> DataFrame: Drop the specified column. Parameters + ---------- label : str From faaf5991f533e8ca9473d5cd205e68a122010b5c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 17 Apr 2023 18:06:13 +0100 Subject: [PATCH 3/8] Revert "remove trailing whitespace" This reverts commit 1fa1030c83eae498a5200e741a349fa56393fbe1. --- spec/API_specification/dataframe_api/dataframe_object.py | 1 - 1 file changed, 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 6a68eb09..c133d79a 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -160,7 +160,6 @@ def drop_column(self, label: str) -> DataFrame: Drop the specified column. Parameters - ---------- label : str From 8b2413b9b41c8fd4f555da539c83b0a8a38c7878 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 18 Apr 2023 08:56:46 +0100 Subject: [PATCH 4/8] type getitems row --- spec/API_specification/dataframe_api/column_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 8e1f04ef..0e7e6181 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -6,7 +6,7 @@ def __len__(self) -> int: Return the number of rows. """ - def __getitem__(self, row) -> object: + def __getitem__(self, row: int) -> object: """ Get the element at row index `row`. """ From 27a1ecebcd2dad13c56a035d84981b91c49bc128 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 19 Apr 2023 11:09:45 +0100 Subject: [PATCH 5/8] let __getitem__ take sequence of int --- .../dataframe_api/column_object.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 0e7e6181..8df81dd7 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -1,4 +1,6 @@ -from typing import NoReturn +from __future__ import annotations + +from typing import NoReturn, overload, Sequence class Column: def __len__(self) -> int: @@ -6,9 +8,17 @@ def __len__(self) -> int: Return the number of rows. """ - def __getitem__(self, row: int) -> object: + @overload + def __getitem__(self, key: int) -> object: + ... + + @overload + def __getitem__(self, key: Sequence[int]) -> Column: + ... + + def __getitem__(self, key: int | Sequence[int]) -> object | Column: """ - Get the element at row index `row`. + Get the element at row index `key`. """ def __iter__(self) -> NoReturn: From 73e798724d1529cbb6eac540a0397b2796cb15f1 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 19 Apr 2023 11:38:13 +0100 Subject: [PATCH 6/8] Revert "let __getitem__ take sequence of int" This reverts commit 27a1ecebcd2dad13c56a035d84981b91c49bc128. --- .../dataframe_api/column_object.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 8df81dd7..0e7e6181 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -1,6 +1,4 @@ -from __future__ import annotations - -from typing import NoReturn, overload, Sequence +from typing import NoReturn class Column: def __len__(self) -> int: @@ -8,17 +6,9 @@ def __len__(self) -> int: Return the number of rows. """ - @overload - def __getitem__(self, key: int) -> object: - ... - - @overload - def __getitem__(self, key: Sequence[int]) -> Column: - ... - - def __getitem__(self, key: int | Sequence[int]) -> object | Column: + def __getitem__(self, row: int) -> object: """ - Get the element at row index `key`. + Get the element at row index `row`. """ def __iter__(self) -> NoReturn: From ce443f8f8053349eead2774f99cb6d7c2adb4db4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 19 Apr 2023 11:39:58 +0100 Subject: [PATCH 7/8] add get_rows --- .../dataframe_api/column_object.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 0e7e6181..1095287e 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import NoReturn class Column: @@ -8,7 +10,7 @@ def __len__(self) -> int: def __getitem__(self, row: int) -> object: """ - Get the element at row index `row`. + Get the element at row index `key`. """ def __iter__(self) -> NoReturn: @@ -22,3 +24,18 @@ def __iter__(self) -> NoReturn: NotImplementedError """ raise NotImplementedError("'__iter__' is intentionally not implemented.") + + def get_rows(self, indices: Column[int]) -> Column: + """ + Select a subset of rows, similar to `ndarray.take`. + + Parameters + ---------- + indices : Column[int] + Positions of rows to select. + + Returns + ------- + Column + """ + ... \ No newline at end of file From 995ec3a416f9b82e9ed54fa0242f77f125a77227 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 28 Apr 2023 16:18:51 +0100 Subject: [PATCH 8/8] doc fixup --- spec/API_specification/dataframe_api/column_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index e804c6e9..c98b422f 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -76,7 +76,7 @@ def get_value(self, row_number: int) -> dtype: Returns ------- - Scalar + dtype Depends on the dtype of the Column, and may vary across implementations. """