From e3adbd8664f02983b72f02625300fa51bc184ff3 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 17 Apr 2023 18:39:55 +0100 Subject: [PATCH 1/9] add some missing Column methods --- .../dataframe_api/column_object.py | 333 +++++++++++++++++- 1 file changed, 332 insertions(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 31b610b7..008b7c3e 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -1,2 +1,333 @@ +from __future__ import annotations + +from ._types import Scalar + class Column: - pass + def __eq__(self, other: Column | Scalar) -> Column: + """ + Compare for equality. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __ne__(self, other: Column | Scalar) -> Column: + """ + Compare for non-equality. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __ge__(self, other: Column | Scalar) -> Column: + """ + Compare for "greater than or equal to" `other`. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __gt__(self, other: Column | Scalar) -> Column: + """ + Compare for "greater than" `other`. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __le__(self, other: Column | Scalar) -> Column: + """ + Compare for "less than or equal to" `other`. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __lt__(self, other: Column | Scalar) -> Column: + """ + Compare for "less than" `other`. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __and__(self, other: Column | Scalar) -> Column: + """ + Add `other` dataframe or scalar to this column. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __sub__(self, other: Column | Scalar) -> Column: + """ + Subtract `other` dataframe or scalar from this column. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __mul__(self, other: Column | Scalar) -> Column: + """ + Multiply `other` dataframe or scalar with this column. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __truediv__(self, other: Column | Scalar) -> Column: + """ + Divide this column by `other` column or scalar. True division, returns floats. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __floordiv__(self, other: Column | Scalar) -> Column: + """ + Floor-divide `other` dataframe or scalar to this column. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __pow__(self, other: Column | Scalar) -> Column: + """ + Raise this column to the power of `other`. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __mod__(self, other: Column | Scalar) -> Column: + """ + Returns modulus of this column by `other` (`%` operator). + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __divmod__(self, other: Column | Scalar) -> tuple[Column, Column]: + """ + Return quotient and remainder of integer division. See `divmod` builtin function. + + Parameters + ---------- + other : Column or Scalar + If Column, must have same length. + "Scalar" here is defined implicitly by what scalar types are allowed + for the operation by the underling dtypes. + + Returns + ------- + Column + """ + + def __invert__(self) -> Column: + """ + Invert truthiness of (boolean) elements. + + Raises + ------ + ValueError + If any of the Column's columns is not boolean. + """ + + def any(self, skip_nulls: bool = True) -> bool: + """ + Reduction returns a bool. + + Raises + ------ + ValueError + If column is not boolean. + """ + + def all(self, skip_nulls: bool = True) -> bool: + """ + Reduction returns a bool. + + Raises + ------ + ValueError + If column is not boolean. + """ + + def min(self, skip_nulls: bool = True) -> float: + """ + Reduction returns a float. + """ + + def max(self, skip_nulls: bool = True) -> float: + """ + Reduction returns a float. + """ + + def sum(self, skipna: bool = True) -> float: + """ + Reduction returns a float. + """ + + def prod(self, skipna: bool = True) -> float: + """ + Reduction returns a float. + """ + + def median(self, skipna: bool = True) -> float: + """ + Reduction returns a float. + """ + + def mean(self, skipna: bool = True) -> float: + """ + Reduction returns a float. + """ + + def std(self, skipna: bool = True) -> float: + """ + Reduction returns a float. + """ + + def var(self, skipna: bool = True) -> float: + """ + Reduction returns a float. + """ + + def isnull(self) -> Column: + """ + Check for 'missing' or 'null' entries. + + Returns + ------- + Column + + See also + -------- + isnan + + Notes + ----- + Does *not* include NaN-like entries. + """ + + def isnan(self) -> Column: + """ + Check for nan-like entries. + + Returns + ------- + Column + + See also + -------- + isnull + + Notes + ----- + Includes anything with NaN-like semantics, e.g. np.datetime64("NaT"). + Does *not* include 'missing' or 'null' entries. + """ From 084b83c53ea13075aa79549eacb0d166139ee2c8 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 27 Apr 2023 11:45:50 +0100 Subject: [PATCH 2/9] update isnan to only be about nan --- spec/API_specification/dataframe_api/dataframe_object.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index b663459f..9f749d83 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -589,12 +589,14 @@ def isnull(self) -> DataFrame: Notes ----- Does *not* include NaN-like entries. + May optionally include 'NaT' values (if present in an implementation), + but note that the Standard makes no guarantees about them. """ ... def isnan(self) -> DataFrame: """ - Check for nan-like entries. + Check for nan entries. Returns ------- @@ -606,7 +608,8 @@ def isnan(self) -> DataFrame: Notes ----- - Includes anything with NaN-like semantics, e.g. np.datetime64("NaT"). + This only checks for 'NaN'. Does *not* include 'missing' or 'null' entries. + In particular, does not check for `np.timedelta64('NaT')`. """ ... From 34e4fc0207ee92803cdd27a91c15cd395953cc0d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 27 Apr 2023 11:54:57 +0100 Subject: [PATCH 3/9] fixup return types of reductions --- .../dataframe_api/column_object.py | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 008b7c3e..d1f29269 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -1,6 +1,6 @@ from __future__ import annotations -from ._types import Scalar +from ._types import Scalar, dtype class Column: def __eq__(self, other: Column | Scalar) -> Column: @@ -257,44 +257,44 @@ def all(self, skip_nulls: bool = True) -> bool: If column is not boolean. """ - def min(self, skip_nulls: bool = True) -> float: + def min(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a float. + Reduction returns a scalar. """ - def max(self, skip_nulls: bool = True) -> float: + def max(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a float. + Reduction returns a scalar. """ - def sum(self, skipna: bool = True) -> float: + def sum(self, skipna: bool = True) -> dtype: """ - Reduction returns a float. + Reduction returns a scalar. """ - def prod(self, skipna: bool = True) -> float: + def prod(self, skipna: bool = True) -> dtype: """ - Reduction returns a float. + Reduction returns a scalar. """ - def median(self, skipna: bool = True) -> float: + def median(self, skipna: bool = True) -> dtype: """ - Reduction returns a float. + Reduction returns a scalar. """ - def mean(self, skipna: bool = True) -> float: + def mean(self, skipna: bool = True) -> dtype: """ - Reduction returns a float. + Reduction returns a scalar. """ - def std(self, skipna: bool = True) -> float: + def std(self, skipna: bool = True) -> dtype: """ - Reduction returns a float. + Reduction returns a scalar. """ - def var(self, skipna: bool = True) -> float: + def var(self, skipna: bool = True) -> dtype: """ - Reduction returns a float. + Reduction returns a scalar. """ def isnull(self) -> Column: From 5c53316f0c1ea93911940229ec99299c8050d048 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 27 Apr 2023 11:55:34 +0100 Subject: [PATCH 4/9] skipna -> skip_nulls --- .../API_specification/dataframe_api/column_object.py | 12 ++++++------ .../dataframe_api/dataframe_object.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index d1f29269..7f727f9d 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -267,32 +267,32 @@ def max(self, skip_nulls: bool = True) -> dtype: Reduction returns a scalar. """ - def sum(self, skipna: bool = True) -> dtype: + def sum(self, skip_nulls: bool = True) -> dtype: """ Reduction returns a scalar. """ - def prod(self, skipna: bool = True) -> dtype: + def prod(self, skip_nulls: bool = True) -> dtype: """ Reduction returns a scalar. """ - def median(self, skipna: bool = True) -> dtype: + def median(self, skip_nulls: bool = True) -> dtype: """ Reduction returns a scalar. """ - def mean(self, skipna: bool = True) -> dtype: + def mean(self, skip_nulls: bool = True) -> dtype: """ Reduction returns a scalar. """ - def std(self, skipna: bool = True) -> dtype: + def std(self, skip_nulls: bool = True) -> dtype: """ Reduction returns a scalar. """ - def var(self, skipna: bool = True) -> dtype: + def var(self, skip_nulls: bool = True) -> dtype: """ Reduction returns a scalar. """ diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 9f749d83..9ea21edc 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -498,7 +498,7 @@ def all(self, skip_nulls: bool = True) -> DataFrame: """ ... - def any_rowwise(self, skipna: bool = True) -> Column: + def any_rowwise(self, skip_nulls: bool = True) -> Column: """ Reduction returns a Column. @@ -512,7 +512,7 @@ def any_rowwise(self, skipna: bool = True) -> Column: """ ... - def all_rowwise(self, skipna: bool = True) -> Column: + def all_rowwise(self, skip_nulls: bool = True) -> Column: """ Reduction returns a Column. From 5ce1c6c060afc90959ad53c38fc517284fced02e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 27 Apr 2023 11:58:45 +0100 Subject: [PATCH 5/9] make fillnan changes only to the coolumn page --- spec/API_specification/dataframe_api/column_object.py | 7 +++++-- spec/API_specification/dataframe_api/dataframe_object.py | 7 ++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 7f727f9d..cbdb9ea9 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -312,11 +312,13 @@ def isnull(self) -> Column: Notes ----- Does *not* include NaN-like entries. + May optionally include 'NaT' values (if present in an implementation), + but note that the Standard makes no guarantees about them. """ def isnan(self) -> Column: """ - Check for nan-like entries. + Check for nan entries. Returns ------- @@ -328,6 +330,7 @@ def isnan(self) -> Column: Notes ----- - Includes anything with NaN-like semantics, e.g. np.datetime64("NaT"). + This only checks for 'NaN'. Does *not* include 'missing' or 'null' entries. + In particular, does not check for `np.timedelta64('NaT')`. """ diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 9ea21edc..1c72a6ad 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -589,14 +589,12 @@ def isnull(self) -> DataFrame: Notes ----- Does *not* include NaN-like entries. - May optionally include 'NaT' values (if present in an implementation), - but note that the Standard makes no guarantees about them. """ ... def isnan(self) -> DataFrame: """ - Check for nan entries. + Check for nan-like entries. Returns ------- @@ -608,8 +606,7 @@ def isnan(self) -> DataFrame: Notes ----- - This only checks for 'NaN'. + Includes anything with NaN-like semantics, e.g. np.datetime64("NaT"). Does *not* include 'missing' or 'null' entries. - In particular, does not check for `np.timedelta64('NaT')`. """ ... From ce1dafc849e41f13894eeb7bb951542622ea86a2 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 27 Apr 2023 12:32:51 +0100 Subject: [PATCH 6/9] Specify more precisely what dtypes are supported --- .../dataframe_api/column_object.py | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index cbdb9ea9..b696da62 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -259,42 +259,57 @@ def all(self, skip_nulls: bool = True) -> bool: def min(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a scalar. + Reduction returns a scalar. Any data type that supports comparisons + must be supported. The returned value has the same dtype as the column. """ def max(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a scalar. + Reduction returns a scalar. Any data type that supports comparisons + must be supported. The returned value has the same dtype as the column. """ def sum(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a scalar. + Reduction returns a scalar. Must be supported for numerical and + datetime data types. The returned value has the same dtype as the + column. """ def prod(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a scalar. + Reduction returns a scalar. Must be supported for numerical data types. + The returned value has the same dtype as the column. """ def median(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a scalar. + Reduction returns a scalar. Any data type that supports comparisons + must be supported. The returned value has the same dtype as the column. """ def mean(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a scalar. + Reduction returns a scalar. Must be supported for numerical and + datetime data types. Returns a float for numerical data types, and + datetime (with the appropriate timedelta format string) for datetime + dtypes. """ def std(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a scalar. + Reduction returns a scalar. Must be supported for numerical and + datetime data types. Returns a float for numerical data types, and + datetime (with the appropriate timedelta format string) for datetime + dtypes. """ def var(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a scalar. + Reduction returns a scalar. Must be supported for numerical and + datetime data types. Returns a float for numerical data types, and + datetime (with the appropriate timedelta format string) for datetime + dtypes. """ def isnull(self) -> Column: From 786827eb3ecd542d18788b41b2358d18893dba00 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 27 Apr 2023 12:39:47 +0100 Subject: [PATCH 7/9] Fix Myst rendering issue --- spec/API_specification/dataframe_api/dataframe_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index f17b3f1d..29dcf5fa 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -265,7 +265,7 @@ def sorted_indices( If a sequence, it must be the same length as `keys`, and determines the direction with which to use each key to sort by. - nulls_position : {'first', 'last'} + nulls_position : ``{'first', 'last'}`` Whether null values should be placed at the beginning or at the end of the result. Note that the position of NaNs is unspecified and may From e385af813616c63ec372709352f81b4b39b47fe6 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 27 Apr 2023 12:40:26 +0100 Subject: [PATCH 8/9] Bug: `median` behaves like `mean`, not like `min` --- spec/API_specification/dataframe_api/column_object.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 7275f839..c11da438 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -317,8 +317,10 @@ def prod(self, skip_nulls: bool = True) -> dtype: def median(self, skip_nulls: bool = True) -> dtype: """ - Reduction returns a scalar. Any data type that supports comparisons - must be supported. The returned value has the same dtype as the column. + Reduction returns a scalar. Must be supported for numerical and + datetime data types. Returns a float for numerical data types, and + datetime (with the appropriate timedelta format string) for datetime + dtypes. """ def mean(self, skip_nulls: bool = True) -> dtype: From 972432716e73566f232a45567ec5af03d70f7671 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 27 Apr 2023 12:42:53 +0100 Subject: [PATCH 9/9] Remove rendering of `__hash__` --- spec/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/conf.py b/spec/conf.py index fc0d1375..c44feb81 100644 --- a/spec/conf.py +++ b/spec/conf.py @@ -51,7 +51,7 @@ 'members': True, 'special-members': True, 'undoc-members': True, - 'exclude-members': '__annotations__, __dict__,__weakref__,__module__', + 'exclude-members': '__annotations__, __dict__,__weakref__,__module__,__hash__', } add_module_names = False napoleon_numpy_docstring = True