From fa889422d40c49d544d489000422d1030c2fc939 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 07:23:43 +0000 Subject: [PATCH 01/12] Update .rst docs, remove empty ones, add a `dataframe_object.rst` --- spec/API_specification/column_selection.md | 1 - spec/API_specification/dataframe_basics.md | 9 - spec/API_specification/dataframe_object.rst | 291 ++++++++++++++++++++ spec/API_specification/filter_rows.md | 1 - spec/API_specification/index.rst | 4 +- 5 files changed, 292 insertions(+), 14 deletions(-) delete mode 100644 spec/API_specification/column_selection.md delete mode 100644 spec/API_specification/dataframe_basics.md create mode 100644 spec/API_specification/dataframe_object.rst delete mode 100644 spec/API_specification/filter_rows.md diff --git a/spec/API_specification/column_selection.md b/spec/API_specification/column_selection.md deleted file mode 100644 index fd70e98e..00000000 --- a/spec/API_specification/column_selection.md +++ /dev/null @@ -1 +0,0 @@ -# Column selection diff --git a/spec/API_specification/dataframe_basics.md b/spec/API_specification/dataframe_basics.md deleted file mode 100644 index 668f564c..00000000 --- a/spec/API_specification/dataframe_basics.md +++ /dev/null @@ -1,9 +0,0 @@ -# Dataframe basics - -## Class name - - -## Dataframe size - - -## Columns names diff --git a/spec/API_specification/dataframe_object.rst b/spec/API_specification/dataframe_object.rst new file mode 100644 index 00000000..20585f01 --- /dev/null +++ b/spec/API_specification/dataframe_object.rst @@ -0,0 +1,291 @@ +.. _array-object: + +Dataframe object +================ + +A conforming implementation of the dataframe API standard must provide and +support an array object having the following attributes and methods. + +------------------------------------------------- + +.. _operators: + +Operators +--------- + +A conforming implementation of the array API standard must provide and support an array object supporting the following Python operators. + +Arithmetic Operators +~~~~~~~~~~~~~~~~~~~~ + +A conforming implementation of the array API standard must provide and support an array object supporting the following Python arithmetic operators. + +- ``+x``: :meth:`.array.__pos__` + + - `operator.pos(x) `_ + - `operator.__pos__(x) `_ + +- `-x`: :meth:`.array.__neg__` + + - `operator.neg(x) `_ + - `operator.__neg__(x) `_ + +- `x1 + x2`: :meth:`.array.__add__` + + - `operator.add(x1, x2) `_ + - `operator.__add__(x1, x2) `_ + +- `x1 - x2`: :meth:`.array.__sub__` + + - `operator.sub(x1, x2) `_ + - `operator.__sub__(x1, x2) `_ + +- `x1 * x2`: :meth:`.array.__mul__` + + - `operator.mul(x1, x2) `_ + - `operator.__mul__(x1, x2) `_ + +- `x1 / x2`: :meth:`.array.__truediv__` + + - `operator.truediv(x1,x2) `_ + - `operator.__truediv__(x1, x2) `_ + +- `x1 // x2`: :meth:`.array.__floordiv__` + + - `operator.floordiv(x1, x2) `_ + - `operator.__floordiv__(x1, x2) `_ + +- `x1 % x2`: :meth:`.array.__mod__` + + - `operator.mod(x1, x2) `_ + - `operator.__mod__(x1, x2) `_ + +- `x1 ** x2`: :meth:`.array.__pow__` + + - `operator.pow(x1, x2) `_ + - `operator.__pow__(x1, x2) `_ + +Arithmetic operators should be defined for arrays having real-valued data types. + +Array Operators +~~~~~~~~~~~~~~~ + +A conforming implementation of the array API standard must provide and support an array object supporting the following Python array operators. + +- `x1 @ x2`: :meth:`.array.__matmul__` + + - `operator.matmul(x1, x2) `_ + - `operator.__matmul__(x1, x2) `_ + +The matmul ``@`` operator should be defined for arrays having real-valued data types. + +Bitwise Operators +~~~~~~~~~~~~~~~~~ + +A conforming implementation of the array API standard must provide and support an array object supporting the following Python bitwise operators. + +- `~x`: :meth:`.array.__invert__` + + - `operator.inv(x) `_ + - `operator.invert(x) `_ + - `operator.__inv__(x) `_ + - `operator.__invert__(x) `_ + +- `x1 & x2`: :meth:`.array.__and__` + + - `operator.and(x1, x2) `_ + - `operator.__and__(x1, x2) `_ + +- `x1 | x2`: :meth:`.array.__or__` + + - `operator.or(x1, x2) `_ + - `operator.__or__(x1, x2) `_ + +- `x1 ^ x2`: :meth:`.array.__xor__` + + - `operator.xor(x1, x2) `_ + - `operator.__xor__(x1, x2) `_ + +- `x1 << x2`: :meth:`.array.__lshift__` + + - `operator.lshift(x1, x2) `_ + - `operator.__lshift__(x1, x2) `_ + +- `x1 >> x2`: :meth:`.array.__rshift__` + + - `operator.rshift(x1, x2) `_ + - `operator.__rshift__(x1, x2) `_ + +Bitwise operators should be defined for arrays having integer and boolean data types. + +Comparison Operators +~~~~~~~~~~~~~~~~~~~~ + +A conforming implementation of the array API standard must provide and support an array object supporting the following Python comparison operators. + +- `x1 < x2`: :meth:`.array.__lt__` + + - `operator.lt(x1, x2) `_ + - `operator.__lt__(x1, x2) `_ + +- `x1 <= x2`: :meth:`.array.__le__` + + - `operator.le(x1, x2) `_ + - `operator.__le__(x1, x2) `_ + +- `x1 > x2`: :meth:`.array.__gt__` + + - `operator.gt(x1, x2) `_ + - `operator.__gt__(x1, x2) `_ + +- `x1 >= x2`: :meth:`.array.__ge__` + + - `operator.ge(x1, x2) `_ + - `operator.__ge__(x1, x2) `_ + +- `x1 == x2`: :meth:`.array.__eq__` + + - `operator.eq(x1, x2) `_ + - `operator.__eq__(x1, x2) `_ + +- `x1 != x2`: :meth:`.array.__ne__` + + - `operator.ne(x1, x2) `_ + - `operator.__ne__(x1, x2) `_ + +Comparison operators should be defined for arrays having any data type. + +In-place Operators +~~~~~~~~~~~~~~~~~~ + +A conforming implementation of the array API standard must provide and support an array object supporting the following in-place Python operators. + +An in-place operation must not change the data type or shape of the in-place array as a result of :ref:`type-promotion` or :ref:`broadcasting`. + +An in-place operation must have the same behavior (including special cases) as its respective binary (i.e., two operand, non-assignment) operation. For example, after in-place addition ``x1 += x2``, the modified array ``x1`` must always equal the result of the equivalent binary arithmetic operation ``x1 = x1 + x2``. + +.. note:: + In-place operators must be supported as discussed in :ref:`copyview-mutability`. + +Arithmetic Operators +"""""""""""""""""""" + +- ``+=``. May be implemented via ``__iadd__``. +- ``-=``. May be implemented via ``__isub__``. +- ``*=``. May be implemented via ``__imul__``. +- ``/=``. May be implemented via ``__itruediv__``. +- ``//=``. May be implemented via ``__ifloordiv__``. +- ``**=``. May be implemented via ``__ipow__``. +- ``%=``. May be implemented via ``__imod__``. + +Array Operators +""""""""""""""" + +- ``@=``. May be implemented via ``__imatmul__``. + +Bitwise Operators +""""""""""""""""" + +- ``&=``. May be implemented via ``__iand__``. +- ``|=``. May be implemented via ``__ior__``. +- ``^=``. May be implemented via ``__ixor__``. +- ``<<=``. May be implemented via ``__ilshift__``. +- ``>>=``. May be implemented via ``__irshift__``. + +Reflected Operators +~~~~~~~~~~~~~~~~~~~ + +A conforming implementation of the array API standard must provide and support an array object supporting the following reflected operators. + +The results of applying reflected operators must match their non-reflected equivalents. + +.. note:: + All operators for which ``array scalar`` is implemented must have an equivalent reflected operator implementation. + +Arithmetic Operators +"""""""""""""""""""" + +- ``__radd__`` +- ``__rsub__`` +- ``__rmul__`` +- ``__rtruediv__`` +- ``__rfloordiv__`` +- ``__rpow__`` +- ``__rmod__`` + +Array Operators +""""""""""""""" + +- ``__rmatmul__`` + +Bitwise Operators +""""""""""""""""" + +- ``__rand__`` +- ``__ror__`` +- ``__rxor__`` +- ``__rlshift__`` +- ``__rrshift__`` + +------------------------------------------------- + +.. currentmodule:: dataframe_api + +Attributes +---------- +.. + NOTE: please keep the attributes in alphabetical order + + +.. autosummary:: + :toctree: generated + :template: property.rst + + dataframe.shape + +------------------------------------------------- + +Methods +------- +.. + NOTE: please keep the methods in alphabetical order + + +.. autosummary:: + :toctree: generated + :template: property.rst + + array.__abs__ + array.__add__ + array.__and__ + array.__array_namespace__ + array.__bool__ + array.__complex__ + array.__dlpack__ + array.__dlpack_device__ + array.__eq__ + array.__float__ + array.__floordiv__ + array.__ge__ + array.__getitem__ + array.__gt__ + array.__index__ + array.__int__ + array.__invert__ + array.__le__ + array.__lshift__ + array.__lt__ + array.__matmul__ + array.__mod__ + array.__mul__ + array.__ne__ + array.__neg__ + array.__or__ + array.__pos__ + array.__pow__ + array.__rshift__ + array.__setitem__ + array.__sub__ + array.__truediv__ + array.__xor__ + array.to_device diff --git a/spec/API_specification/filter_rows.md b/spec/API_specification/filter_rows.md deleted file mode 100644 index 4cd7cd01..00000000 --- a/spec/API_specification/filter_rows.md +++ /dev/null @@ -1 +0,0 @@ -# Filter rows diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst index d8a9dddb..39ddcf9d 100644 --- a/spec/API_specification/index.rst +++ b/spec/API_specification/index.rst @@ -5,6 +5,4 @@ API specification :caption: API specification :maxdepth: 1 - dataframe_basics - column_selection - filter_rows + dataframe_object From f20a39dfba80bc98fc9854961067e06a90fa0d96 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 07:24:25 +0000 Subject: [PATCH 02/12] Fix type annotations so package is importable --- .../dataframe_api/dataframe_object.py | 61 +++++++++---------- .../dataframe_api/groupby_object.py | 20 +++--- 2 files changed, 40 insertions(+), 41 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index b409cf49..dccc08f5 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -1,10 +1,9 @@ __all__ = ["DataFrame"] -from typing import Sequence, TYPE_CHECKING +from typing import Sequence, Union, TYPE_CHECKING -if TYPE_CHECKING: - from .column_object import Column - from .groupby_object import GroupBy +from .column_object import Column +from .groupby_object import GroupBy class DataFrame: @@ -91,7 +90,7 @@ def slice_rows( """ ... - def get_rows_by_mask(self, mask: Column[bool]) -> "DataFrame": + def get_rows_by_mask(self, mask: "Column[bool]") -> "DataFrame": """ Select a subset of rows corresponding to a mask. @@ -158,7 +157,7 @@ def set_column(self, label: str, value: Column) -> "DataFrame": """ ... - def __eq__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __eq__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -173,7 +172,7 @@ def __eq__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __ne__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __ne__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -188,7 +187,7 @@ def __ne__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __ge__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __ge__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -203,7 +202,7 @@ def __ge__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __gt__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __gt__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -218,7 +217,7 @@ def __gt__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __le__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __le__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -233,7 +232,7 @@ def __le__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __lt__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __lt__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -248,7 +247,7 @@ def __lt__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __add__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __add__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -263,7 +262,7 @@ def __add__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __sub__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __sub__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -278,7 +277,7 @@ def __sub__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __mul__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __mul__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -293,7 +292,7 @@ def __mul__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __truediv__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __truediv__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -308,7 +307,7 @@ def __truediv__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __floordiv__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __floordiv__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -323,7 +322,7 @@ def __floordiv__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __pow__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __pow__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -338,7 +337,7 @@ def __pow__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __mod__(self, other: DataFrame | "Scalar") -> "DataFrame": + def __mod__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ Parameters ---------- @@ -353,7 +352,7 @@ def __mod__(self, other: DataFrame | "Scalar") -> "DataFrame": """ ... - def __divmod__(self, other: DataFrame | "Scalar") -> tuple["DataFrame", "DataFrame"]: + def __divmod__(self, other: Union["DataFrame", "Scalar"]) -> tuple["DataFrame", "DataFrame"]: """ Parameters ---------- @@ -369,67 +368,67 @@ def __divmod__(self, other: DataFrame | "Scalar") -> tuple["DataFrame", "DataFra """ ... - def any(self, skipna: bool = True) -> DataFrame: + def any(self, skipna: bool = True) -> "DataFrame": """ Reduction returns a 1-row DataFrame. """ ... - def all(self, skipna: bool = True) -> DataFrame: + def all(self, skipna: bool = True) -> "DataFrame": """ Reduction returns a 1-row DataFrame. """ ... - def min(self, skipna: bool = True) -> DataFrame: + def min(self, skipna: bool = True) -> "DataFrame": """ Reduction returns a 1-row DataFrame. """ ... - def max(self, skipna: bool = True) -> DataFrame: + def max(self, skipna: bool = True) -> "DataFrame": """ Reduction returns a 1-row DataFrame. """ ... - def sum(self, skipna: bool = True) -> DataFrame: + def sum(self, skipna: bool = True) -> "DataFrame": """ Reduction returns a 1-row DataFrame. """ ... - def prod(self, skipna: bool = True) -> DataFrame: + def prod(self, skipna: bool = True) -> "DataFrame": """ Reduction returns a 1-row DataFrame. """ ... - def median(self, skipna: bool = True) -> DataFrame: + def median(self, skipna: bool = True) -> "DataFrame": """ Reduction returns a 1-row DataFrame. """ ... - def mean(self, skipna: bool = True) -> DataFrame: + def mean(self, skipna: bool = True) -> "DataFrame": """ Reduction returns a 1-row DataFrame. """ ... - def std(self, skipna: bool = True) -> DataFrame: + def std(self, skipna: bool = True) -> "DataFrame": """ Reduction returns a 1-row DataFrame. """ ... - def var(self, skipna: bool = True) -> DataFrame: + def var(self, skipna: bool = True) -> "DataFrame": """ Reduction returns a 1-row DataFrame. """ ... - def isnull(self) -> DataFrame: + def isnull(self) -> "DataFrame": """ Check for 'missing' or 'null' entries. @@ -447,7 +446,7 @@ def isnull(self) -> DataFrame: """ ... - def isnan(self) -> DataFrame: + def isnan(self) -> "DataFrame": """ Check for nan-like entries. diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py index a597e677..a00cc9ec 100644 --- a/spec/API_specification/dataframe_api/groupby_object.py +++ b/spec/API_specification/dataframe_api/groupby_object.py @@ -5,32 +5,32 @@ class GroupBy: - def any(self, skipna: bool = True) -> DataFrame: + def any(self, skipna: bool = True) -> "DataFrame": ... - def all(self, skipna: bool = True) -> DataFrame: + def all(self, skipna: bool = True) -> "DataFrame": ... - def min(self, skipna: bool = True) -> DataFrame: + def min(self, skipna: bool = True) -> "DataFrame": ... - def max(self, skipna: bool = True) -> DataFrame: + def max(self, skipna: bool = True) -> "DataFrame": ... - def sum(self, skipna: bool = True) -> DataFrame: + def sum(self, skipna: bool = True) -> "DataFrame": ... - def prod(self, skipna: bool = True) -> DataFrame: + def prod(self, skipna: bool = True) -> "DataFrame": ... - def median(self, skipna: bool = True) -> DataFrame: + def median(self, skipna: bool = True) -> "DataFrame": ... - def mean(self, skipna: bool = True) -> DataFrame: + def mean(self, skipna: bool = True) -> "DataFrame": ... - def std(self, skipna: bool = True) -> DataFrame: + def std(self, skipna: bool = True) -> "DataFrame": ... - def var(self, skipna: bool = True) -> DataFrame: + def var(self, skipna: bool = True) -> "DataFrame": ... From 772c82f4d3929ec0f33fe895ab55f7d81f500531 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 07:32:03 +0000 Subject: [PATCH 03/12] Update `dataframe_object.rst` and use of autodoc --- spec/API_specification/dataframe_object.rst | 164 +++++++------------- spec/API_specification/index.rst | 2 + 2 files changed, 54 insertions(+), 112 deletions(-) diff --git a/spec/API_specification/dataframe_object.rst b/spec/API_specification/dataframe_object.rst index 20585f01..a668570c 100644 --- a/spec/API_specification/dataframe_object.rst +++ b/spec/API_specification/dataframe_object.rst @@ -20,59 +20,59 @@ Arithmetic Operators A conforming implementation of the array API standard must provide and support an array object supporting the following Python arithmetic operators. -- ``+x``: :meth:`.array.__pos__` +- ``+x``: :meth:`.dataframe.__pos__` - `operator.pos(x) `_ - `operator.__pos__(x) `_ -- `-x`: :meth:`.array.__neg__` +- `-x`: :meth:`.dataframe.__neg__` - `operator.neg(x) `_ - `operator.__neg__(x) `_ -- `x1 + x2`: :meth:`.array.__add__` +- `x1 + x2`: :meth:`.dataframe.__add__` - `operator.add(x1, x2) `_ - `operator.__add__(x1, x2) `_ -- `x1 - x2`: :meth:`.array.__sub__` +- `x1 - x2`: :meth:`.dataframe.__sub__` - `operator.sub(x1, x2) `_ - `operator.__sub__(x1, x2) `_ -- `x1 * x2`: :meth:`.array.__mul__` +- `x1 * x2`: :meth:`.dataframe.__mul__` - `operator.mul(x1, x2) `_ - `operator.__mul__(x1, x2) `_ -- `x1 / x2`: :meth:`.array.__truediv__` +- `x1 / x2`: :meth:`.dataframe.__truediv__` - `operator.truediv(x1,x2) `_ - `operator.__truediv__(x1, x2) `_ -- `x1 // x2`: :meth:`.array.__floordiv__` +- `x1 // x2`: :meth:`.dataframe.__floordiv__` - `operator.floordiv(x1, x2) `_ - `operator.__floordiv__(x1, x2) `_ -- `x1 % x2`: :meth:`.array.__mod__` +- `x1 % x2`: :meth:`.dataframe.__mod__` - `operator.mod(x1, x2) `_ - `operator.__mod__(x1, x2) `_ -- `x1 ** x2`: :meth:`.array.__pow__` +- `x1 ** x2`: :meth:`.dataframe.__pow__` - `operator.pow(x1, x2) `_ - `operator.__pow__(x1, x2) `_ -Arithmetic operators should be defined for arrays having real-valued data types. +Arithmetic operators should be defined for dataframe having real-valued data types. Array Operators ~~~~~~~~~~~~~~~ A conforming implementation of the array API standard must provide and support an array object supporting the following Python array operators. -- `x1 @ x2`: :meth:`.array.__matmul__` +- `x1 @ x2`: :meth:`.dataframe.__matmul__` - `operator.matmul(x1, x2) `_ - `operator.__matmul__(x1, x2) `_ @@ -84,34 +84,34 @@ Bitwise Operators A conforming implementation of the array API standard must provide and support an array object supporting the following Python bitwise operators. -- `~x`: :meth:`.array.__invert__` +- `~x`: :meth:`.dataframe.__invert__` - `operator.inv(x) `_ - `operator.invert(x) `_ - `operator.__inv__(x) `_ - `operator.__invert__(x) `_ -- `x1 & x2`: :meth:`.array.__and__` +- `x1 & x2`: :meth:`.dataframe.__and__` - `operator.and(x1, x2) `_ - `operator.__and__(x1, x2) `_ -- `x1 | x2`: :meth:`.array.__or__` +- `x1 | x2`: :meth:`.dataframe.__or__` - `operator.or(x1, x2) `_ - `operator.__or__(x1, x2) `_ -- `x1 ^ x2`: :meth:`.array.__xor__` +- `x1 ^ x2`: :meth:`.dataframe.__xor__` - `operator.xor(x1, x2) `_ - `operator.__xor__(x1, x2) `_ -- `x1 << x2`: :meth:`.array.__lshift__` +- `x1 << x2`: :meth:`.dataframe.__lshift__` - `operator.lshift(x1, x2) `_ - `operator.__lshift__(x1, x2) `_ -- `x1 >> x2`: :meth:`.array.__rshift__` +- `x1 >> x2`: :meth:`.dataframe.__rshift__` - `operator.rshift(x1, x2) `_ - `operator.__rshift__(x1, x2) `_ @@ -121,86 +121,51 @@ Bitwise operators should be defined for arrays having integer and boolean data t Comparison Operators ~~~~~~~~~~~~~~~~~~~~ -A conforming implementation of the array API standard must provide and support an array object supporting the following Python comparison operators. +A conforming implementation of the dataframe API standard must provide and +support a dataframe object supporting the following Python comparison +operators. -- `x1 < x2`: :meth:`.array.__lt__` +- `x1 < x2`: :meth:`.dataframe.__lt__` - `operator.lt(x1, x2) `_ - `operator.__lt__(x1, x2) `_ -- `x1 <= x2`: :meth:`.array.__le__` +- `x1 <= x2`: :meth:`.dataframe.__le__` - `operator.le(x1, x2) `_ - `operator.__le__(x1, x2) `_ -- `x1 > x2`: :meth:`.array.__gt__` +- `x1 > x2`: :meth:`.dataframe.__gt__` - `operator.gt(x1, x2) `_ - `operator.__gt__(x1, x2) `_ -- `x1 >= x2`: :meth:`.array.__ge__` +- `x1 >= x2`: :meth:`.dataframe.__ge__` - `operator.ge(x1, x2) `_ - `operator.__ge__(x1, x2) `_ -- `x1 == x2`: :meth:`.array.__eq__` +- `x1 == x2`: :meth:`.dataframe.__eq__` - `operator.eq(x1, x2) `_ - `operator.__eq__(x1, x2) `_ -- `x1 != x2`: :meth:`.array.__ne__` +- `x1 != x2`: :meth:`.dataframe.__ne__` - `operator.ne(x1, x2) `_ - `operator.__ne__(x1, x2) `_ -Comparison operators should be defined for arrays having any data type. +Comparison operators should be defined for dataframes having any data type. In-place Operators ~~~~~~~~~~~~~~~~~~ -A conforming implementation of the array API standard must provide and support an array object supporting the following in-place Python operators. - -An in-place operation must not change the data type or shape of the in-place array as a result of :ref:`type-promotion` or :ref:`broadcasting`. - -An in-place operation must have the same behavior (including special cases) as its respective binary (i.e., two operand, non-assignment) operation. For example, after in-place addition ``x1 += x2``, the modified array ``x1`` must always equal the result of the equivalent binary arithmetic operation ``x1 = x1 + x2``. - -.. note:: - In-place operators must be supported as discussed in :ref:`copyview-mutability`. - -Arithmetic Operators -"""""""""""""""""""" - -- ``+=``. May be implemented via ``__iadd__``. -- ``-=``. May be implemented via ``__isub__``. -- ``*=``. May be implemented via ``__imul__``. -- ``/=``. May be implemented via ``__itruediv__``. -- ``//=``. May be implemented via ``__ifloordiv__``. -- ``**=``. May be implemented via ``__ipow__``. -- ``%=``. May be implemented via ``__imod__``. - -Array Operators -""""""""""""""" - -- ``@=``. May be implemented via ``__imatmul__``. - -Bitwise Operators -""""""""""""""""" - -- ``&=``. May be implemented via ``__iand__``. -- ``|=``. May be implemented via ``__ior__``. -- ``^=``. May be implemented via ``__ixor__``. -- ``<<=``. May be implemented via ``__ilshift__``. -- ``>>=``. May be implemented via ``__irshift__``. +TODO Reflected Operators ~~~~~~~~~~~~~~~~~~~ -A conforming implementation of the array API standard must provide and support an array object supporting the following reflected operators. - -The results of applying reflected operators must match their non-reflected equivalents. - -.. note:: - All operators for which ``array scalar`` is implemented must have an equivalent reflected operator implementation. +TODO Arithmetic Operators """""""""""""""""""" @@ -213,20 +178,6 @@ Arithmetic Operators - ``__rpow__`` - ``__rmod__`` -Array Operators -""""""""""""""" - -- ``__rmatmul__`` - -Bitwise Operators -""""""""""""""""" - -- ``__rand__`` -- ``__ror__`` -- ``__rxor__`` -- ``__rlshift__`` -- ``__rrshift__`` - ------------------------------------------------- .. currentmodule:: dataframe_api @@ -255,37 +206,26 @@ Methods :toctree: generated :template: property.rst - array.__abs__ - array.__add__ - array.__and__ - array.__array_namespace__ - array.__bool__ - array.__complex__ - array.__dlpack__ - array.__dlpack_device__ - array.__eq__ - array.__float__ - array.__floordiv__ - array.__ge__ - array.__getitem__ - array.__gt__ - array.__index__ - array.__int__ - array.__invert__ - array.__le__ - array.__lshift__ - array.__lt__ - array.__matmul__ - array.__mod__ - array.__mul__ - array.__ne__ - array.__neg__ - array.__or__ - array.__pos__ - array.__pow__ - array.__rshift__ - array.__setitem__ - array.__sub__ - array.__truediv__ - array.__xor__ - array.to_device + dataframe.__abs__ + dataframe.__add__ + dataframe.__dataframe_namespace__ + dataframe.__complex__ + dataframe.__eq__ + dataframe.__float__ + dataframe.__floordiv__ + dataframe.__ge__ + dataframe.__getitem__ + dataframe.__gt__ + dataframe.__int__ + dataframe.__le__ + dataframe.__lt__ + dataframe.__mod__ + dataframe.__mul__ + dataframe.__ne__ + dataframe.__neg__ + dataframe.__or__ + dataframe.__pos__ + dataframe.__pow__ + dataframe.__setitem__ + dataframe.__sub__ + dataframe.__truediv__ diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst index 39ddcf9d..eebf648e 100644 --- a/spec/API_specification/index.rst +++ b/spec/API_specification/index.rst @@ -1,6 +1,8 @@ API specification ================= +.. currentmodule:: dataframe_api + .. toctree:: :caption: API specification :maxdepth: 1 From b91b6b861e32121e4629a652c3b9936a332f47d5 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 07:43:28 +0000 Subject: [PATCH 04/12] A bunch more edits --- .../dataframe_api/__init__.py | 5 + spec/API_specification/dataframe_object.rst | 138 ++++++------------ 2 files changed, 50 insertions(+), 93 deletions(-) diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py index 45eefa77..94462b17 100644 --- a/spec/API_specification/dataframe_api/__init__.py +++ b/spec/API_specification/dataframe_api/__init__.py @@ -2,6 +2,11 @@ Function stubs and API documentation for the DataFrame API standard. """ +from .column_object import * +from .dataframe_object import * +from .groupby_object import * + + __dataframe_api_version__: str = "YYYY.MM" """ String representing the version of the DataFrame API specification to which the diff --git a/spec/API_specification/dataframe_object.rst b/spec/API_specification/dataframe_object.rst index a668570c..c8f9a3d7 100644 --- a/spec/API_specification/dataframe_object.rst +++ b/spec/API_specification/dataframe_object.rst @@ -4,7 +4,7 @@ Dataframe object ================ A conforming implementation of the dataframe API standard must provide and -support an array object having the following attributes and methods. +support a dataframe object having the following attributes and methods. ------------------------------------------------- @@ -13,110 +13,62 @@ support an array object having the following attributes and methods. Operators --------- -A conforming implementation of the array API standard must provide and support an array object supporting the following Python operators. +A conforming implementation of the dataframe API standard must provide and +support a dataframe object supporting the following Python operators. Arithmetic Operators ~~~~~~~~~~~~~~~~~~~~ -A conforming implementation of the array API standard must provide and support an array object supporting the following Python arithmetic operators. +A conforming implementation of the array API standard must provide and support +an array object supporting the following Python arithmetic operators. -- ``+x``: :meth:`.dataframe.__pos__` +- ``+x``: :meth:`.DataFrame.__pos__` - `operator.pos(x) `_ - `operator.__pos__(x) `_ -- `-x`: :meth:`.dataframe.__neg__` +- `-x`: :meth:`.DataFrame.__neg__` - `operator.neg(x) `_ - `operator.__neg__(x) `_ -- `x1 + x2`: :meth:`.dataframe.__add__` +- `x1 + x2`: :meth:`.DataFrame.__add__` - `operator.add(x1, x2) `_ - `operator.__add__(x1, x2) `_ -- `x1 - x2`: :meth:`.dataframe.__sub__` +- `x1 - x2`: :meth:`.DataFrame.__sub__` - `operator.sub(x1, x2) `_ - `operator.__sub__(x1, x2) `_ -- `x1 * x2`: :meth:`.dataframe.__mul__` +- `x1 * x2`: :meth:`.DataFrame.__mul__` - `operator.mul(x1, x2) `_ - `operator.__mul__(x1, x2) `_ -- `x1 / x2`: :meth:`.dataframe.__truediv__` +- `x1 / x2`: :meth:`.DataFrame.__truediv__` - `operator.truediv(x1,x2) `_ - `operator.__truediv__(x1, x2) `_ -- `x1 // x2`: :meth:`.dataframe.__floordiv__` +- `x1 // x2`: :meth:`.DataFrame.__floordiv__` - `operator.floordiv(x1, x2) `_ - `operator.__floordiv__(x1, x2) `_ -- `x1 % x2`: :meth:`.dataframe.__mod__` +- `x1 % x2`: :meth:`.DataFrame.__mod__` - `operator.mod(x1, x2) `_ - `operator.__mod__(x1, x2) `_ -- `x1 ** x2`: :meth:`.dataframe.__pow__` +- `x1 ** x2`: :meth:`.DataFrame.__pow__` - `operator.pow(x1, x2) `_ - `operator.__pow__(x1, x2) `_ -Arithmetic operators should be defined for dataframe having real-valued data types. - -Array Operators -~~~~~~~~~~~~~~~ - -A conforming implementation of the array API standard must provide and support an array object supporting the following Python array operators. - -- `x1 @ x2`: :meth:`.dataframe.__matmul__` - - - `operator.matmul(x1, x2) `_ - - `operator.__matmul__(x1, x2) `_ - -The matmul ``@`` operator should be defined for arrays having real-valued data types. - -Bitwise Operators -~~~~~~~~~~~~~~~~~ - -A conforming implementation of the array API standard must provide and support an array object supporting the following Python bitwise operators. - -- `~x`: :meth:`.dataframe.__invert__` - - - `operator.inv(x) `_ - - `operator.invert(x) `_ - - `operator.__inv__(x) `_ - - `operator.__invert__(x) `_ - -- `x1 & x2`: :meth:`.dataframe.__and__` - - - `operator.and(x1, x2) `_ - - `operator.__and__(x1, x2) `_ - -- `x1 | x2`: :meth:`.dataframe.__or__` - - - `operator.or(x1, x2) `_ - - `operator.__or__(x1, x2) `_ - -- `x1 ^ x2`: :meth:`.dataframe.__xor__` - - - `operator.xor(x1, x2) `_ - - `operator.__xor__(x1, x2) `_ - -- `x1 << x2`: :meth:`.dataframe.__lshift__` - - - `operator.lshift(x1, x2) `_ - - `operator.__lshift__(x1, x2) `_ - -- `x1 >> x2`: :meth:`.dataframe.__rshift__` - - - `operator.rshift(x1, x2) `_ - - `operator.__rshift__(x1, x2) `_ +Arithmetic operators should be defined for a dataframe having real-valued data types. -Bitwise operators should be defined for arrays having integer and boolean data types. Comparison Operators ~~~~~~~~~~~~~~~~~~~~ @@ -125,32 +77,32 @@ A conforming implementation of the dataframe API standard must provide and support a dataframe object supporting the following Python comparison operators. -- `x1 < x2`: :meth:`.dataframe.__lt__` +- `x1 < x2`: :meth:`.DataFrame.__lt__` - `operator.lt(x1, x2) `_ - `operator.__lt__(x1, x2) `_ -- `x1 <= x2`: :meth:`.dataframe.__le__` +- `x1 <= x2`: :meth:`.DataFrame.__le__` - `operator.le(x1, x2) `_ - `operator.__le__(x1, x2) `_ -- `x1 > x2`: :meth:`.dataframe.__gt__` +- `x1 > x2`: :meth:`.DataFrame.__gt__` - `operator.gt(x1, x2) `_ - `operator.__gt__(x1, x2) `_ -- `x1 >= x2`: :meth:`.dataframe.__ge__` +- `x1 >= x2`: :meth:`.DataFrame.__ge__` - `operator.ge(x1, x2) `_ - `operator.__ge__(x1, x2) `_ -- `x1 == x2`: :meth:`.dataframe.__eq__` +- `x1 == x2`: :meth:`.DataFrame.__eq__` - `operator.eq(x1, x2) `_ - `operator.__eq__(x1, x2) `_ -- `x1 != x2`: :meth:`.dataframe.__ne__` +- `x1 != x2`: :meth:`.DataFrame.__ne__` - `operator.ne(x1, x2) `_ - `operator.__ne__(x1, x2) `_ @@ -192,7 +144,7 @@ Attributes :toctree: generated :template: property.rst - dataframe.shape + DataFrame.shape ------------------------------------------------- @@ -206,26 +158,26 @@ Methods :toctree: generated :template: property.rst - dataframe.__abs__ - dataframe.__add__ - dataframe.__dataframe_namespace__ - dataframe.__complex__ - dataframe.__eq__ - dataframe.__float__ - dataframe.__floordiv__ - dataframe.__ge__ - dataframe.__getitem__ - dataframe.__gt__ - dataframe.__int__ - dataframe.__le__ - dataframe.__lt__ - dataframe.__mod__ - dataframe.__mul__ - dataframe.__ne__ - dataframe.__neg__ - dataframe.__or__ - dataframe.__pos__ - dataframe.__pow__ - dataframe.__setitem__ - dataframe.__sub__ - dataframe.__truediv__ + DataFrame.__abs__ + DataFrame.__add__ + DataFrame.__dataframe_namespace__ + DataFrame.__complex__ + DataFrame.__eq__ + DataFrame.__float__ + DataFrame.__floordiv__ + DataFrame.__ge__ + DataFrame.__getitem__ + DataFrame.__gt__ + DataFrame.__int__ + DataFrame.__le__ + DataFrame.__lt__ + DataFrame.__mod__ + DataFrame.__mul__ + DataFrame.__ne__ + DataFrame.__neg__ + DataFrame.__or__ + DataFrame.__pos__ + DataFrame.__pow__ + DataFrame.__setitem__ + DataFrame.__sub__ + DataFrame.__truediv__ From c665192d121386b17a8a9208d51ce78eb41338f5 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 08:00:56 +0000 Subject: [PATCH 05/12] Add a `_types.py` file for future use --- .../API_specification/dataframe_api/_types.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 spec/API_specification/dataframe_api/_types.py diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py new file mode 100644 index 00000000..0e93c45e --- /dev/null +++ b/spec/API_specification/dataframe_api/_types.py @@ -0,0 +1,63 @@ +""" +Types for type annotations used in the dataframe API standard. + +The type variables should be replaced with the actual types for a given +library, e.g., for Pandas TypeVar('DataFrame') would be replaced with pd.DataFrame. +""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import ( + Any, + List, + Literal, + Optional, + Sequence, + Tuple, + TypeVar, + Union, + Protocol, +) +from enum import Enum + +array = TypeVar("array") +DataFrame = TypeVar("DataFrame") +device = TypeVar("device") +dtype = TypeVar("dtype") +SupportsDLPack = TypeVar("SupportsDLPack") +SupportsBufferProtocol = TypeVar("SupportsBufferProtocol") +PyCapsule = TypeVar("PyCapsule") +# ellipsis cannot actually be imported from anywhere, so include a dummy here +# to keep pyflakes happy. https://github.com/python/typeshed/issues/3556 +ellipsis = TypeVar("ellipsis") + +_T_co = TypeVar("_T_co", covariant=True) + + +class NestedSequence(Protocol[_T_co]): + def __getitem__(self, key: int, /) -> Union[_T_co, NestedSequence[_T_co]]: + ... + + def __len__(self, /) -> int: + ... + + +__all__ = [ + "Any", + "DataFrame", + "List", + "Literal", + "NestedSequence", + "Optional", + "PyCapsule", + "SupportsBufferProtocol", + "SupportsDLPack", + "Tuple", + "Union", + "Sequence", + "array", + "device", + "dtype", + "ellipsis", + "Enum", +] From c50bc0fdf2a059c39cf3894dbdc0cd65eb63f7b1 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 08:01:19 +0000 Subject: [PATCH 06/12] WIP: undo some type annotation changes that don't seem needed --- .../dataframe_api/dataframe_object.py | 75 ++++++++++--------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index dccc08f5..a6cbb635 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -1,11 +1,16 @@ -__all__ = ["DataFrame"] - +from __future__ import annotations from typing import Sequence, Union, TYPE_CHECKING from .column_object import Column from .groupby_object import GroupBy +__all__ = ["DataFrame"] + +class Scalar: + "A class to represent Python scalars" + + class DataFrame: def groupby(self, keys: list[str], /) -> GroupBy: """ @@ -32,7 +37,7 @@ def get_column_by_name(self, name: str, /) -> Column: """ ... - def get_columns_by_name(self, names: Sequence[str], /) -> "DataFrame": + def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame: """ Select multiple columns by name. @@ -51,7 +56,7 @@ def get_columns_by_name(self, names: Sequence[str], /) -> "DataFrame": """ ... - def get_rows(self, indices: Sequence[int]) -> "DataFrame": + def get_rows(self, indices: Sequence[int]) -> DataFrame: """ Select a subset of rows, similar to `ndarray.take`. @@ -74,7 +79,7 @@ def get_rows(self, indices: Sequence[int]) -> "DataFrame": def slice_rows( self, start: int | None, stop: int | None, step: int | None - ) -> "DataFrame": + ) -> DataFrame: """ Select a subset of rows corresponding to a slice. @@ -90,7 +95,7 @@ def slice_rows( """ ... - def get_rows_by_mask(self, mask: "Column[bool]") -> "DataFrame": + def get_rows_by_mask(self, mask: "Column[bool]") -> DataFrame: """ Select a subset of rows corresponding to a mask. @@ -109,7 +114,7 @@ def get_rows_by_mask(self, mask: "Column[bool]") -> "DataFrame": """ ... - def insert(self, loc: int, label: str, value: Column) -> "DataFrame": + def insert(self, loc: int, label: str, value: Column) -> DataFrame: """ Insert column into DataFrame at specified location. @@ -123,7 +128,7 @@ def insert(self, loc: int, label: str, value: Column) -> "DataFrame": """ ... - def drop_column(self, label: str) -> "DataFrame": + def drop_column(self, label: str) -> DataFrame: """ Drop the specified column. @@ -142,7 +147,7 @@ def drop_column(self, label: str) -> "DataFrame": """ ... - def set_column(self, label: str, value: Column) -> "DataFrame": + def set_column(self, label: str, value: Column) -> DataFrame: """ Add or replace a column. @@ -157,7 +162,7 @@ def set_column(self, label: str, value: Column) -> "DataFrame": """ ... - def __eq__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __eq__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -172,7 +177,7 @@ def __eq__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __ne__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __ne__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -187,7 +192,7 @@ def __ne__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __ge__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __ge__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -202,7 +207,7 @@ def __ge__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __gt__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __gt__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -217,7 +222,7 @@ def __gt__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __le__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __le__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -232,7 +237,7 @@ def __le__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __lt__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __lt__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -247,7 +252,7 @@ def __lt__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __add__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __add__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -262,7 +267,7 @@ def __add__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __sub__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __sub__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -277,7 +282,7 @@ def __sub__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __mul__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __mul__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -292,7 +297,7 @@ def __mul__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __truediv__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __truediv__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -307,7 +312,7 @@ def __truediv__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __floordiv__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __floordiv__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -322,7 +327,7 @@ def __floordiv__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __pow__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __pow__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -337,7 +342,7 @@ def __pow__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __mod__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": + def __mod__(self, other: DataFrame | "Scalar") -> DataFrame: """ Parameters ---------- @@ -352,7 +357,7 @@ def __mod__(self, other: Union["DataFrame", "Scalar"]) -> "DataFrame": """ ... - def __divmod__(self, other: Union["DataFrame", "Scalar"]) -> tuple["DataFrame", "DataFrame"]: + def __divmod__(self, other: DataFrame | "Scalar") -> tuple[DataFrame, DataFrame]: """ Parameters ---------- @@ -368,67 +373,67 @@ def __divmod__(self, other: Union["DataFrame", "Scalar"]) -> tuple["DataFrame", """ ... - def any(self, skipna: bool = True) -> "DataFrame": + def any(self, skipna: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. """ ... - def all(self, skipna: bool = True) -> "DataFrame": + def all(self, skipna: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. """ ... - def min(self, skipna: bool = True) -> "DataFrame": + def min(self, skipna: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. """ ... - def max(self, skipna: bool = True) -> "DataFrame": + def max(self, skipna: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. """ ... - def sum(self, skipna: bool = True) -> "DataFrame": + def sum(self, skipna: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. """ ... - def prod(self, skipna: bool = True) -> "DataFrame": + def prod(self, skipna: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. """ ... - def median(self, skipna: bool = True) -> "DataFrame": + def median(self, skipna: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. """ ... - def mean(self, skipna: bool = True) -> "DataFrame": + def mean(self, skipna: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. """ ... - def std(self, skipna: bool = True) -> "DataFrame": + def std(self, skipna: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. """ ... - def var(self, skipna: bool = True) -> "DataFrame": + def var(self, skipna: bool = True) -> DataFrame: """ Reduction returns a 1-row DataFrame. """ ... - def isnull(self) -> "DataFrame": + def isnull(self) -> DataFrame: """ Check for 'missing' or 'null' entries. @@ -446,7 +451,7 @@ def isnull(self) -> "DataFrame": """ ... - def isnan(self) -> "DataFrame": + def isnan(self) -> DataFrame: """ Check for nan-like entries. From 71a10c4e60e4642b978ed318c18e2476011f2f0c Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 08:02:34 +0000 Subject: [PATCH 07/12] Update .gitignore for generated files --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 0e28134f..a37fe28a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *.swp _build +__pycache__ +spec/API_specification/generated/ From 3fe6b68f9107dac46f73f6e8ffaa0c8ad3405162 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 08:17:42 +0000 Subject: [PATCH 08/12] More changes, some autodoc warnings gone now --- .../dataframe_api/dataframe_object.py | 1 + spec/API_specification/dataframe_object.rst | 30 +++++-------------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index a6cbb635..acae4588 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -7,6 +7,7 @@ __all__ = ["DataFrame"] + class Scalar: "A class to represent Python scalars" diff --git a/spec/API_specification/dataframe_object.rst b/spec/API_specification/dataframe_object.rst index c8f9a3d7..8634a2ba 100644 --- a/spec/API_specification/dataframe_object.rst +++ b/spec/API_specification/dataframe_object.rst @@ -1,4 +1,4 @@ -.. _array-object: +.. _dataframe-object: Dataframe object ================ @@ -22,16 +22,6 @@ Arithmetic Operators A conforming implementation of the array API standard must provide and support an array object supporting the following Python arithmetic operators. -- ``+x``: :meth:`.DataFrame.__pos__` - - - `operator.pos(x) `_ - - `operator.__pos__(x) `_ - -- `-x`: :meth:`.DataFrame.__neg__` - - - `operator.neg(x) `_ - - `operator.__neg__(x) `_ - - `x1 + x2`: :meth:`.DataFrame.__add__` - `operator.add(x1, x2) `_ @@ -136,11 +126,15 @@ Arithmetic Operators Attributes ---------- + +TODO + .. NOTE: please keep the attributes in alphabetical order -.. autosummary:: +.. + autosummary:: :toctree: generated :template: property.rst @@ -158,26 +152,16 @@ Methods :toctree: generated :template: property.rst - DataFrame.__abs__ DataFrame.__add__ - DataFrame.__dataframe_namespace__ - DataFrame.__complex__ DataFrame.__eq__ - DataFrame.__float__ DataFrame.__floordiv__ DataFrame.__ge__ - DataFrame.__getitem__ DataFrame.__gt__ - DataFrame.__int__ DataFrame.__le__ DataFrame.__lt__ + DataFrame.__ne__ DataFrame.__mod__ DataFrame.__mul__ - DataFrame.__ne__ - DataFrame.__neg__ - DataFrame.__or__ - DataFrame.__pos__ DataFrame.__pow__ - DataFrame.__setitem__ DataFrame.__sub__ DataFrame.__truediv__ From 51a6f0de33ab153bb87b5ae0e7ebf121d0abdc51 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 08:30:41 +0000 Subject: [PATCH 09/12] Extend docstrings of dunder methods, so autodoc table looks reasonable. --- .../dataframe_api/dataframe_object.py | 31 +++++++++++++++++-- spec/API_specification/dataframe_object.rst | 5 +++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index acae4588..7997e2c3 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -165,6 +165,8 @@ def set_column(self, label: str, value: Column) -> DataFrame: def __eq__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Compare for equality. + Parameters ---------- other : DataFrame or Scalar @@ -180,6 +182,8 @@ def __eq__(self, other: DataFrame | "Scalar") -> DataFrame: def __ne__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Compare for non-equality. + Parameters ---------- other : DataFrame or Scalar @@ -195,6 +199,8 @@ def __ne__(self, other: DataFrame | "Scalar") -> DataFrame: def __ge__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Compare for "greater than or equal to" `other`. + Parameters ---------- other : DataFrame or Scalar @@ -210,6 +216,8 @@ def __ge__(self, other: DataFrame | "Scalar") -> DataFrame: def __gt__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Compare for "greater than" `other`. + Parameters ---------- other : DataFrame or Scalar @@ -225,6 +233,8 @@ def __gt__(self, other: DataFrame | "Scalar") -> DataFrame: def __le__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Compare for "less than or equal to" `other`. + Parameters ---------- other : DataFrame or Scalar @@ -240,6 +250,8 @@ def __le__(self, other: DataFrame | "Scalar") -> DataFrame: def __lt__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Compare for "less than" `other`. + Parameters ---------- other : DataFrame or Scalar @@ -255,6 +267,8 @@ def __lt__(self, other: DataFrame | "Scalar") -> DataFrame: def __add__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Add `other` dataframe or scalar to this dataframe. + Parameters ---------- other : DataFrame or Scalar @@ -270,6 +284,8 @@ def __add__(self, other: DataFrame | "Scalar") -> DataFrame: def __sub__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Subtract `other` dataframe or scalar from this dataframe. + Parameters ---------- other : DataFrame or Scalar @@ -285,6 +301,8 @@ def __sub__(self, other: DataFrame | "Scalar") -> DataFrame: def __mul__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Multiply `other` dataframe or scalar with this dataframe. + Parameters ---------- other : DataFrame or Scalar @@ -300,6 +318,8 @@ def __mul__(self, other: DataFrame | "Scalar") -> DataFrame: def __truediv__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Divide this dataframe by `other` dataframe or scalar. True division, returns floats. + Parameters ---------- other : DataFrame or Scalar @@ -315,6 +335,8 @@ def __truediv__(self, other: DataFrame | "Scalar") -> DataFrame: def __floordiv__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Floor-divide (returns integers) this dataframe by `other` dataframe or scalar. + Parameters ---------- other : DataFrame or Scalar @@ -330,6 +352,8 @@ def __floordiv__(self, other: DataFrame | "Scalar") -> DataFrame: def __pow__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Raise this dataframe to the power of `other`. + Parameters ---------- other : DataFrame or Scalar @@ -345,6 +369,8 @@ def __pow__(self, other: DataFrame | "Scalar") -> DataFrame: def __mod__(self, other: DataFrame | "Scalar") -> DataFrame: """ + Return modulus of this dataframe by `other` (`%` operator). + Parameters ---------- other : DataFrame or Scalar @@ -360,6 +386,8 @@ def __mod__(self, other: DataFrame | "Scalar") -> DataFrame: def __divmod__(self, other: DataFrame | "Scalar") -> tuple[DataFrame, DataFrame]: """ + Return quotient and remainder of integer division. See `divmod` builtin function. + Parameters ---------- other : DataFrame or Scalar @@ -369,8 +397,7 @@ def __divmod__(self, other: DataFrame | "Scalar") -> tuple[DataFrame, DataFrame] Returns ------- - DataFrame - DataFrame + A tuple of two DataFrame's """ ... diff --git a/spec/API_specification/dataframe_object.rst b/spec/API_specification/dataframe_object.rst index 8634a2ba..e7f502f4 100644 --- a/spec/API_specification/dataframe_object.rst +++ b/spec/API_specification/dataframe_object.rst @@ -59,6 +59,11 @@ an array object supporting the following Python arithmetic operators. Arithmetic operators should be defined for a dataframe having real-valued data types. +.. note:: + + TODO: figure out whether we want to add ``__neg__`` and ``__pos__``, those + are the two missing arithmetic operators. + Comparison Operators ~~~~~~~~~~~~~~~~~~~~ From 723426596cf8cce4cdc26a988ba4268639ee482e Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 08:42:28 +0000 Subject: [PATCH 10/12] More fixes - Sphinx build is clean now --- spec/API_specification/dataframe_api/_types.py | 2 +- spec/API_specification/dataframe_api/dataframe_object.py | 6 ++---- spec/conf.py | 7 ++++--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py index 0e93c45e..0987ecaa 100644 --- a/spec/API_specification/dataframe_api/_types.py +++ b/spec/API_specification/dataframe_api/_types.py @@ -21,7 +21,7 @@ from enum import Enum array = TypeVar("array") -DataFrame = TypeVar("DataFrame") +Scalar = TypeVar("Scalar") device = TypeVar("device") dtype = TypeVar("dtype") SupportsDLPack = TypeVar("SupportsDLPack") diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 7997e2c3..9c12665c 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -4,12 +4,10 @@ from .column_object import Column from .groupby_object import GroupBy - -__all__ = ["DataFrame"] +from ._types import Scalar -class Scalar: - "A class to represent Python scalars" +__all__ = ["DataFrame"] class DataFrame: diff --git a/spec/conf.py b/spec/conf.py index a45bb3b3..dfce68a6 100644 --- a/spec/conf.py +++ b/spec/conf.py @@ -65,7 +65,7 @@ # them don't actually refer to anything that we have a document for. nitpick_ignore = [ ('py:class', 'array'), - ('py:class', 'dataframe'), + ('py:class', 'DataFrame'), ('py:class', 'device'), ('py:class', 'dtype'), ('py:class', 'NestedSequence'), @@ -73,11 +73,12 @@ ('py:class', 'PyCapsule'), ('py:class', 'enum.Enum'), ('py:class', 'ellipsis'), + ('py:class', 'Scalar'), ] # NOTE: this alias handling isn't used yet - added in anticipation of future -# need based on array API aliases. +# need based on dataframe API aliases. # In dataframe_object.py we have to use aliased names for some types because they -# would otherwise refer back to method objects of array +# would otherwise refer back to method objects of `dataframe` autodoc_type_aliases = { 'dataframe': 'dataframe', 'Device': 'device', From 89d8ee5fbcffc53d3f4366ae9e0335c79ec602f4 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 17 Jan 2023 08:48:56 +0000 Subject: [PATCH 11/12] Add groupby and column APIs to html docs, and add rst templates The Sphinx templates help with better styling of methods/attrs --- spec/API_specification/column_object.rst | 23 +++++++++++++++++ spec/API_specification/groupby_object.rst | 31 +++++++++++++++++++++++ spec/API_specification/index.rst | 4 ++- spec/_templates/attribute.rst | 5 ++++ spec/_templates/method.rst | 5 ++++ spec/_templates/property.rst | 5 ++++ 6 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 spec/API_specification/column_object.rst create mode 100644 spec/API_specification/groupby_object.rst create mode 100644 spec/_templates/attribute.rst create mode 100644 spec/_templates/method.rst create mode 100644 spec/_templates/property.rst diff --git a/spec/API_specification/column_object.rst b/spec/API_specification/column_object.rst new file mode 100644 index 00000000..06206d12 --- /dev/null +++ b/spec/API_specification/column_object.rst @@ -0,0 +1,23 @@ +.. _column-object: + +Column object +============= + +A conforming implementation of the dataframe API standard must provide and +support a column object having the following attributes and methods. + +------------------------------------------------- + +Methods +------- +TODO + +.. + NOTE: please keep the methods in alphabetical order + + .. currentmodule:: dataframe_api + + .. autosummary:: + :toctree: generated + :template: property.rst + diff --git a/spec/API_specification/groupby_object.rst b/spec/API_specification/groupby_object.rst new file mode 100644 index 00000000..60b9b2bb --- /dev/null +++ b/spec/API_specification/groupby_object.rst @@ -0,0 +1,31 @@ +.. _groupby-object: + +Groupby object +============== + +A conforming implementation of the dataframe API standard must provide and +support a groupby object having the following attributes and methods. + +------------------------------------------------- + +Methods +------- +.. + NOTE: please keep the methods in alphabetical order + +.. currentmodule:: dataframe_api + +.. autosummary:: + :toctree: generated + :template: property.rst + + GroupBy.all + GroupBy.any + GroupBy.max + GroupBy.min + GroupBy.mean + GroupBy.median + GroupBy.prod + GroupBy.std + GroupBy.sum + GroupBy.var diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst index eebf648e..de195405 100644 --- a/spec/API_specification/index.rst +++ b/spec/API_specification/index.rst @@ -5,6 +5,8 @@ API specification .. toctree:: :caption: API specification - :maxdepth: 1 + :maxdepth: 3 dataframe_object + column_object + groupby_object diff --git a/spec/_templates/attribute.rst b/spec/_templates/attribute.rst new file mode 100644 index 00000000..30d21295 --- /dev/null +++ b/spec/_templates/attribute.rst @@ -0,0 +1,5 @@ +.. currentmodule:: {{ module }} + +{{ name.split('.')[-1] | underline }} + +.. autodata:: {{ name }} diff --git a/spec/_templates/method.rst b/spec/_templates/method.rst new file mode 100644 index 00000000..3a85f287 --- /dev/null +++ b/spec/_templates/method.rst @@ -0,0 +1,5 @@ +.. currentmodule:: {{ module }} + +{{ name.split('.')[-1] | underline }} + +.. autofunction:: {{ name }} diff --git a/spec/_templates/property.rst b/spec/_templates/property.rst new file mode 100644 index 00000000..baf31cea --- /dev/null +++ b/spec/_templates/property.rst @@ -0,0 +1,5 @@ +.. currentmodule:: {{ module }} + +{{ name.split('.')[-1] | underline }} + +.. auto{{ objtype }}:: {{ objname }} \ No newline at end of file From 5f143904d701ff5745cf407f78064afcc6c78365 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 18 Jan 2023 13:17:27 +0000 Subject: [PATCH 12/12] Address review comments --- .../dataframe_api/dataframe_object.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 9c12665c..9b3e0857 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -1,10 +1,10 @@ from __future__ import annotations from typing import Sequence, Union, TYPE_CHECKING -from .column_object import Column -from .groupby_object import GroupBy - -from ._types import Scalar +if TYPE_CHECKING: + from .column_object import Column + from .groupby_object import GroupBy + from ._types import Scalar __all__ = ["DataFrame"] @@ -161,7 +161,7 @@ def set_column(self, label: str, value: Column) -> DataFrame: """ ... - def __eq__(self, other: DataFrame | "Scalar") -> DataFrame: + def __eq__(self, other: DataFrame | Scalar) -> DataFrame: """ Compare for equality. @@ -178,7 +178,7 @@ def __eq__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __ne__(self, other: DataFrame | "Scalar") -> DataFrame: + def __ne__(self, other: DataFrame | Scalar) -> DataFrame: """ Compare for non-equality. @@ -195,7 +195,7 @@ def __ne__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __ge__(self, other: DataFrame | "Scalar") -> DataFrame: + def __ge__(self, other: DataFrame | Scalar) -> DataFrame: """ Compare for "greater than or equal to" `other`. @@ -212,7 +212,7 @@ def __ge__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __gt__(self, other: DataFrame | "Scalar") -> DataFrame: + def __gt__(self, other: DataFrame | Scalar) -> DataFrame: """ Compare for "greater than" `other`. @@ -229,7 +229,7 @@ def __gt__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __le__(self, other: DataFrame | "Scalar") -> DataFrame: + def __le__(self, other: DataFrame | Scalar) -> DataFrame: """ Compare for "less than or equal to" `other`. @@ -246,7 +246,7 @@ def __le__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __lt__(self, other: DataFrame | "Scalar") -> DataFrame: + def __lt__(self, other: DataFrame | Scalar) -> DataFrame: """ Compare for "less than" `other`. @@ -263,7 +263,7 @@ def __lt__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __add__(self, other: DataFrame | "Scalar") -> DataFrame: + def __add__(self, other: DataFrame | Scalar) -> DataFrame: """ Add `other` dataframe or scalar to this dataframe. @@ -280,7 +280,7 @@ def __add__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __sub__(self, other: DataFrame | "Scalar") -> DataFrame: + def __sub__(self, other: DataFrame | Scalar) -> DataFrame: """ Subtract `other` dataframe or scalar from this dataframe. @@ -297,7 +297,7 @@ def __sub__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __mul__(self, other: DataFrame | "Scalar") -> DataFrame: + def __mul__(self, other: DataFrame | Scalar) -> DataFrame: """ Multiply `other` dataframe or scalar with this dataframe. @@ -314,7 +314,7 @@ def __mul__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __truediv__(self, other: DataFrame | "Scalar") -> DataFrame: + def __truediv__(self, other: DataFrame | Scalar) -> DataFrame: """ Divide this dataframe by `other` dataframe or scalar. True division, returns floats. @@ -331,7 +331,7 @@ def __truediv__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __floordiv__(self, other: DataFrame | "Scalar") -> DataFrame: + def __floordiv__(self, other: DataFrame | Scalar) -> DataFrame: """ Floor-divide (returns integers) this dataframe by `other` dataframe or scalar. @@ -348,7 +348,7 @@ def __floordiv__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __pow__(self, other: DataFrame | "Scalar") -> DataFrame: + def __pow__(self, other: DataFrame | Scalar) -> DataFrame: """ Raise this dataframe to the power of `other`. @@ -365,7 +365,7 @@ def __pow__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __mod__(self, other: DataFrame | "Scalar") -> DataFrame: + def __mod__(self, other: DataFrame | Scalar) -> DataFrame: """ Return modulus of this dataframe by `other` (`%` operator). @@ -382,7 +382,7 @@ def __mod__(self, other: DataFrame | "Scalar") -> DataFrame: """ ... - def __divmod__(self, other: DataFrame | "Scalar") -> tuple[DataFrame, DataFrame]: + def __divmod__(self, other: DataFrame | Scalar) -> tuple[DataFrame, DataFrame]: """ Return quotient and remainder of integer division. See `divmod` builtin function.