From 26ca8b23b0195f230fb4f8cf5de23afeb266ab67 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 26 Apr 2023 15:08:01 +0100 Subject: [PATCH 1/7] Update front page and sections --- spec/api_design_methodology.md | 3 +++ spec/benchmark_suite.md | 1 - spec/conf.py | 4 ++-- spec/index.rst | 13 ++++++++++--- spec/usage_data.md | 17 ----------------- 5 files changed, 15 insertions(+), 23 deletions(-) create mode 100644 spec/api_design_methodology.md delete mode 100644 spec/benchmark_suite.md delete mode 100644 spec/usage_data.md diff --git a/spec/api_design_methodology.md b/spec/api_design_methodology.md new file mode 100644 index 00000000..f4ecfcc3 --- /dev/null +++ b/spec/api_design_methodology.md @@ -0,0 +1,3 @@ +# Methodology for API design + +TODO: describe approach used to construct the API. diff --git a/spec/benchmark_suite.md b/spec/benchmark_suite.md deleted file mode 100644 index 77000885..00000000 --- a/spec/benchmark_suite.md +++ /dev/null @@ -1 +0,0 @@ -# Benchmark suite diff --git a/spec/conf.py b/spec/conf.py index dfce68a6..229a4d5c 100644 --- a/spec/conf.py +++ b/spec/conf.py @@ -18,11 +18,11 @@ # -- Project information ----------------------------------------------------- project = 'Python dataframe API standard' -copyright = '2022, Consortium for Python Data API Standards' +copyright = '2021-, Consortium for Python Data API Standards' author = 'Consortium for Python Data API Standards' # The full version, including alpha/beta/rc tags -release = '2022.12-DRAFT' +release = '2023.04-DRAFT' # -- General configuration --------------------------------------------------- diff --git a/spec/index.rst b/spec/index.rst index c14ab8d0..4abaf7e1 100644 --- a/spec/index.rst +++ b/spec/index.rst @@ -1,6 +1,14 @@ Python dataframe API standard ============================= +.. note:: + + This API standard is still a work in progress, and approaching "minimum + viable product" status, where it's becoming possible to write library code + against it that is dataframe-agnostic. Design discussions are + happening in `this repository `__. + Participation there is very much welcome. + Contents -------- @@ -21,9 +29,8 @@ Contents API_specification/index .. toctree:: - :caption: Methodology and Usage + :caption: Methodology and Tooling :maxdepth: 1 - usage_data + api_design_methodology verification_test_suite - benchmark_suite diff --git a/spec/usage_data.md b/spec/usage_data.md deleted file mode 100644 index 9280e5c1..00000000 --- a/spec/usage_data.md +++ /dev/null @@ -1,17 +0,0 @@ -# Data on existing API design & usage - -## Summary - - - - -## Methods - - - - -## Tooling - - - -## Detailed results and raw data From 48cb02437441748ba7faf1341f94d47253fb872a Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 26 Apr 2023 22:06:33 +0100 Subject: [PATCH 2/7] Use autodoc better to generate dataframe and groupby docs --- .../dataframe_api/dataframe_object.py | 10 +++++++++ .../dataframe_api/groupby_object.py | 12 +++++++++++ spec/API_specification/dataframe_object.rst | 21 +------------------ spec/API_specification/groupby_object.rst | 21 +++---------------- spec/conf.py | 8 +++++++ 5 files changed, 34 insertions(+), 38 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 5128101f..ea46cd52 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -11,6 +11,16 @@ class DataFrame: + """ + A DataFrame. + + Notes + ----- + Note that this DataFrame class is not meant to be instantiated directly by + users of the library implementing the dataframe API standard. Rather, use + constructor functions or an already-created dataframe object retrieved via + + """ @property def dataframe(self) -> object: """ diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py index a8eced30..9f6ee813 100644 --- a/spec/API_specification/dataframe_api/groupby_object.py +++ b/spec/API_specification/dataframe_api/groupby_object.py @@ -4,7 +4,19 @@ from .dataframe_object import DataFrame +__all__ = ['GroupBy'] + + class GroupBy: + """ + GroupBy class. + + Note that this class is not meant to be constructed by users. + It is returned from `DataFrame.groupby`. + + **Methods** + + """ def any(self, skip_nulls: bool = True) -> "DataFrame": ... diff --git a/spec/API_specification/dataframe_object.rst b/spec/API_specification/dataframe_object.rst index e7f502f4..86c13c6c 100644 --- a/spec/API_specification/dataframe_object.rst +++ b/spec/API_specification/dataframe_object.rst @@ -149,24 +149,5 @@ TODO Methods ------- -.. - NOTE: please keep the methods in alphabetical order - - -.. autosummary:: - :toctree: generated - :template: property.rst - DataFrame.__add__ - DataFrame.__eq__ - DataFrame.__floordiv__ - DataFrame.__ge__ - DataFrame.__gt__ - DataFrame.__le__ - DataFrame.__lt__ - DataFrame.__ne__ - DataFrame.__mod__ - DataFrame.__mul__ - DataFrame.__pow__ - DataFrame.__sub__ - DataFrame.__truediv__ +.. autoclass:: DataFrame diff --git a/spec/API_specification/groupby_object.rst b/spec/API_specification/groupby_object.rst index 60b9b2bb..324d4dbc 100644 --- a/spec/API_specification/groupby_object.rst +++ b/spec/API_specification/groupby_object.rst @@ -6,26 +6,11 @@ Groupby object A conforming implementation of the dataframe API standard must provide and support a groupby object having the following attributes and methods. -------------------------------------------------- - -Methods -------- -.. - NOTE: please keep the methods in alphabetical order - .. currentmodule:: dataframe_api .. autosummary:: :toctree: generated - :template: property.rst + :template: class.rst + + GroupBy - GroupBy.all - GroupBy.any - GroupBy.max - GroupBy.min - GroupBy.mean - GroupBy.median - GroupBy.prod - GroupBy.std - GroupBy.sum - GroupBy.var diff --git a/spec/conf.py b/spec/conf.py index 229a4d5c..7b98d86b 100644 --- a/spec/conf.py +++ b/spec/conf.py @@ -46,7 +46,15 @@ autosummary_generate = True autodoc_typehints = 'signature' +autodoc_default_options = { +# 'attributes': True, + 'members': True, + 'special-members': True, + 'undoc-members': True, + 'exclude-members': '__annotations__, __dict__', +} add_module_names = False +napoleon_numpy_docstring = True napoleon_custom_sections = [('Returns', 'params_style')] default_role = 'code' From 8a577c9cfb32ef02c9cb33e4ff8c0449f0798369 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 26 Apr 2023 22:17:12 +0100 Subject: [PATCH 3/7] More autodoc improvements, and cleaning up operator support boilerplate --- .../dataframe_api/groupby_object.py | 2 +- spec/API_specification/dataframe_object.rst | 94 +------------------ spec/API_specification/groupby_object.rst | 8 +- spec/conf.py | 2 +- 4 files changed, 5 insertions(+), 101 deletions(-) diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py index 9f6ee813..ee74ed95 100644 --- a/spec/API_specification/dataframe_api/groupby_object.py +++ b/spec/API_specification/dataframe_api/groupby_object.py @@ -9,7 +9,7 @@ class GroupBy: """ - GroupBy class. + GroupBy object. Note that this class is not meant to be constructed by users. It is returned from `DataFrame.groupby`. diff --git a/spec/API_specification/dataframe_object.rst b/spec/API_specification/dataframe_object.rst index 86c13c6c..718d6f31 100644 --- a/spec/API_specification/dataframe_object.rst +++ b/spec/API_specification/dataframe_object.rst @@ -6,10 +6,6 @@ Dataframe object A conforming implementation of the dataframe API standard must provide and support a dataframe object having the following attributes and methods. -------------------------------------------------- - -.. _operators: - Operators --------- @@ -20,42 +16,7 @@ Arithmetic Operators ~~~~~~~~~~~~~~~~~~~~ A conforming implementation of the array API standard must provide and support -an array object supporting the following Python arithmetic operators. - -- `x1 + x2`: :meth:`.DataFrame.__add__` - - - `operator.add(x1, x2) `_ - - `operator.__add__(x1, x2) `_ - -- `x1 - x2`: :meth:`.DataFrame.__sub__` - - - `operator.sub(x1, x2) `_ - - `operator.__sub__(x1, x2) `_ - -- `x1 * x2`: :meth:`.DataFrame.__mul__` - - - `operator.mul(x1, x2) `_ - - `operator.__mul__(x1, x2) `_ - -- `x1 / x2`: :meth:`.DataFrame.__truediv__` - - - `operator.truediv(x1,x2) `_ - - `operator.__truediv__(x1, x2) `_ - -- `x1 // x2`: :meth:`.DataFrame.__floordiv__` - - - `operator.floordiv(x1, x2) `_ - - `operator.__floordiv__(x1, x2) `_ - -- `x1 % x2`: :meth:`.DataFrame.__mod__` - - - `operator.mod(x1, x2) `_ - - `operator.__mod__(x1, x2) `_ - -- `x1 ** x2`: :meth:`.DataFrame.__pow__` - - - `operator.pow(x1, x2) `_ - - `operator.__pow__(x1, x2) `_ +a dataframe object supporting the following Python arithmetic operators. Arithmetic operators should be defined for a dataframe having real-valued data types. @@ -72,36 +33,6 @@ A conforming implementation of the dataframe API standard must provide and support a dataframe object supporting the following Python comparison operators. -- `x1 < x2`: :meth:`.DataFrame.__lt__` - - - `operator.lt(x1, x2) `_ - - `operator.__lt__(x1, x2) `_ - -- `x1 <= x2`: :meth:`.DataFrame.__le__` - - - `operator.le(x1, x2) `_ - - `operator.__le__(x1, x2) `_ - -- `x1 > x2`: :meth:`.DataFrame.__gt__` - - - `operator.gt(x1, x2) `_ - - `operator.__gt__(x1, x2) `_ - -- `x1 >= x2`: :meth:`.DataFrame.__ge__` - - - `operator.ge(x1, x2) `_ - - `operator.__ge__(x1, x2) `_ - -- `x1 == x2`: :meth:`.DataFrame.__eq__` - - - `operator.eq(x1, x2) `_ - - `operator.__eq__(x1, x2) `_ - -- `x1 != x2`: :meth:`.DataFrame.__ne__` - - - `operator.ne(x1, x2) `_ - - `operator.__ne__(x1, x2) `_ - Comparison operators should be defined for dataframes having any data type. In-place Operators @@ -125,29 +56,6 @@ Arithmetic Operators - ``__rpow__`` - ``__rmod__`` -------------------------------------------------- - .. currentmodule:: dataframe_api -Attributes ----------- - -TODO - -.. - NOTE: please keep the attributes in alphabetical order - - -.. - autosummary:: - :toctree: generated - :template: property.rst - - DataFrame.shape - -------------------------------------------------- - -Methods -------- - .. autoclass:: DataFrame diff --git a/spec/API_specification/groupby_object.rst b/spec/API_specification/groupby_object.rst index 324d4dbc..b316263c 100644 --- a/spec/API_specification/groupby_object.rst +++ b/spec/API_specification/groupby_object.rst @@ -4,13 +4,9 @@ Groupby object ============== A conforming implementation of the dataframe API standard must provide and -support a groupby object having the following attributes and methods. +support a groupby object having the following methods. .. currentmodule:: dataframe_api -.. autosummary:: - :toctree: generated - :template: class.rst - - GroupBy +.. autoclass:: GroupBy diff --git a/spec/conf.py b/spec/conf.py index 7b98d86b..fc0d1375 100644 --- a/spec/conf.py +++ b/spec/conf.py @@ -51,7 +51,7 @@ 'members': True, 'special-members': True, 'undoc-members': True, - 'exclude-members': '__annotations__, __dict__', + 'exclude-members': '__annotations__, __dict__,__weakref__,__module__', } add_module_names = False napoleon_numpy_docstring = True From 6d36cfb47f3d1a49cf43a6b974e9b3ca2d726a27 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 26 Apr 2023 22:46:12 +0100 Subject: [PATCH 4/7] Finish dealing with operators on dataframes --- .../dataframe_api/dataframe_object.py | 19 +++++-- spec/API_specification/dataframe_object.rst | 53 +------------------ spec/API_specification/groupby_object.rst | 3 +- 3 files changed, 19 insertions(+), 56 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index ea46cd52..840f8059 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -12,14 +12,25 @@ class DataFrame: """ - A DataFrame. + DataFrame object - Notes - ----- - Note that this DataFrame class is not meant to be instantiated directly by + Note that this DataFrame object is not meant to be instantiated directly by users of the library implementing the dataframe API standard. Rather, use constructor functions or an already-created dataframe object retrieved via + **Python operator support** + + All arithmetic operators defined by the Python language, except for + ``__matmul__``, ``__neg__`` and ``__pos__``, must be supported for + numerical data types. + + All comparison operators defined by the Python language must be supported + by the dataframe object for all data types for which those comparisons are + supported by the builtin scalar types corresponding to a data type. + + In-place operators must not be supported. All operations on the dataframe + object are out-of-place. + """ @property def dataframe(self) -> object: diff --git a/spec/API_specification/dataframe_object.rst b/spec/API_specification/dataframe_object.rst index 718d6f31..784debeb 100644 --- a/spec/API_specification/dataframe_object.rst +++ b/spec/API_specification/dataframe_object.rst @@ -4,57 +4,8 @@ Dataframe object ================ A conforming implementation of the dataframe API standard must provide and -support a dataframe object having the following attributes and methods. - -Operators ---------- - -A conforming implementation of the dataframe API standard must provide and -support a dataframe object supporting the following Python operators. - -Arithmetic Operators -~~~~~~~~~~~~~~~~~~~~ - -A conforming implementation of the array API standard must provide and support -a dataframe object supporting the following Python arithmetic operators. - -Arithmetic operators should be defined for a dataframe having real-valued data types. - -.. note:: - - TODO: figure out whether we want to add ``__neg__`` and ``__pos__``, those - are the two missing arithmetic operators. - - -Comparison Operators -~~~~~~~~~~~~~~~~~~~~ - -A conforming implementation of the dataframe API standard must provide and -support a dataframe object supporting the following Python comparison -operators. - -Comparison operators should be defined for dataframes having any data type. - -In-place Operators -~~~~~~~~~~~~~~~~~~ - -TODO - -Reflected Operators -~~~~~~~~~~~~~~~~~~~ - -TODO - -Arithmetic Operators -"""""""""""""""""""" - -- ``__radd__`` -- ``__rsub__`` -- ``__rmul__`` -- ``__rtruediv__`` -- ``__rfloordiv__`` -- ``__rpow__`` -- ``__rmod__`` +support a dataframe object having the following methods, attributes, and +behavior. .. currentmodule:: dataframe_api diff --git a/spec/API_specification/groupby_object.rst b/spec/API_specification/groupby_object.rst index b316263c..b8b47d99 100644 --- a/spec/API_specification/groupby_object.rst +++ b/spec/API_specification/groupby_object.rst @@ -4,9 +4,10 @@ Groupby object ============== A conforming implementation of the dataframe API standard must provide and -support a groupby object having the following methods. +support a groupby object with the following API: .. currentmodule:: dataframe_api .. autoclass:: GroupBy + From 9fde26d8efa045c31143ed3957699bbba1ae8166 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 26 Apr 2023 23:05:11 +0100 Subject: [PATCH 5/7] Fix last warnings, and include Column in the same way as DataFrame --- spec/API_specification/column_object.rst | 19 ++++--------------- .../dataframe_api/column_object.py | 11 +++++++++++ .../dataframe_api/dataframe_object.py | 4 +++- spec/API_specification/index.rst | 1 - 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/spec/API_specification/column_object.rst b/spec/API_specification/column_object.rst index 06206d12..3201b500 100644 --- a/spec/API_specification/column_object.rst +++ b/spec/API_specification/column_object.rst @@ -4,20 +4,9 @@ Column object ============= A conforming implementation of the dataframe API standard must provide and -support a column object having the following attributes and methods. +support a column object having the following methods, attributes, and +behavior. -------------------------------------------------- - -Methods -------- -TODO - -.. - NOTE: please keep the methods in alphabetical order - - .. currentmodule:: dataframe_api - - .. autosummary:: - :toctree: generated - :template: property.rst +.. currentmodule:: dataframe_api +.. autoclass:: Column diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 31b610b7..901aaca7 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -1,2 +1,13 @@ +__all__ = ['Column'] + + class Column: + """ + Column object + + Note that this column object is not meant to be instantiated directly by + users of the library implementing the dataframe API standard. Rather, use + constructor functions or an already-created dataframe object retrieved via + + """ pass diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 840f8059..6dfd32ab 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -14,7 +14,7 @@ class DataFrame: """ DataFrame object - Note that this DataFrame object is not meant to be instantiated directly by + Note that this dataframe object is not meant to be instantiated directly by users of the library implementing the dataframe API standard. Rather, use constructor functions or an already-created dataframe object retrieved via @@ -31,6 +31,8 @@ class DataFrame: In-place operators must not be supported. All operations on the dataframe object are out-of-place. + **Methods and Attributes** + """ @property def dataframe(self) -> object: diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst index de195405..f41e913d 100644 --- a/spec/API_specification/index.rst +++ b/spec/API_specification/index.rst @@ -4,7 +4,6 @@ API specification .. currentmodule:: dataframe_api .. toctree:: - :caption: API specification :maxdepth: 3 dataframe_object From c34267c4c46bbd09dbcbd5f430f884e82ece0d46 Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 27 Apr 2023 01:05:43 +0100 Subject: [PATCH 6/7] Upgrade Sphinx, sphinx-material and docutils --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 23041378..950cda26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -sphinx==4.3.0 -sphinx-material==0.0.30 +sphinx==6.2.1 +sphinx-material==0.0.35 myst-parser sphinx_markdown_tables sphinx_copybutton -docutils<0.18 +docutils==0.19 sphinx-math-dollar From 5d74312fdcda2abb818d4ad924f15e92b31dcd3d Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Thu, 27 Apr 2023 01:07:01 +0100 Subject: [PATCH 7/7] Remove a few sentences with intra-markdown-file links Resolves the issue with CI not being green due to a bug in Myst --- spec/use_cases.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/spec/use_cases.md b/spec/use_cases.md index b0c1e387..02bd012c 100644 --- a/spec/use_cases.md +++ b/spec/use_cases.md @@ -4,12 +4,6 @@ This section discusses the use cases considered for the standard dataframe API. -The goals and scope of this API are defined in the [goals](purpose_and_scope.md#Goals), -and [scope](purpose_and_scope.md#Scope) sections. - -The target audience and stakeholders are presented in the -[stakeholders](purpose_and_scope.md#Stakeholders) section. - ## Types of use cases