From 5273b6469c3c602b4d696fb9faca472d65d4a7f9 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 08:57:49 +0100
Subject: [PATCH 01/38] wip

---
 .../dataframe_api/__init__.py                 | 181 -----
 .../API_specification/dataframe_api/_types.py |  63 --
 .../dataframe_api/column_object.py            | 586 --------------
 .../dataframe_api/dataframe_object.py         | 754 ------------------
 .../dataframe_api/groupby_object.py           |  51 --
 5 files changed, 1635 deletions(-)
 delete mode 100644 spec/API_specification/dataframe_api/__init__.py
 delete mode 100644 spec/API_specification/dataframe_api/_types.py
 delete mode 100644 spec/API_specification/dataframe_api/column_object.py
 delete mode 100644 spec/API_specification/dataframe_api/dataframe_object.py
 delete mode 100644 spec/API_specification/dataframe_api/groupby_object.py

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
deleted file mode 100644
index b8b7014c..00000000
--- a/spec/API_specification/dataframe_api/__init__.py
+++ /dev/null
@@ -1,181 +0,0 @@
-"""
-Function stubs and API documentation for the DataFrame API standard.
-"""
-from __future__ import annotations
-
-from typing import Mapping, Sequence, Any
-
-from .column_object import *
-from .dataframe_object import *
-from .groupby_object import *
-
-
-__all__ = [
-    "__dataframe_api_version",
-    "column_from_sequence",
-    "concat",
-    "dataframe_from_dict",
-    "is_null",
-    "null",
-    "DType",
-    "Int64",
-    "Int32",
-    "Int16",
-    "Int8",
-    "UInt64",
-    "UInt32",
-    "UInt16",
-    "UInt8",
-    "Float64",
-    "Float32",
-    "Bool",
-]
-
-
-__dataframe_api_version__: str = "YYYY.MM"
-"""
-String representing the version of the DataFrame API specification to which
-the conforming implementation adheres. Set to a concrete value for a stable
-implementation of the dataframe API standard.
-"""
-
-def concat(dataframes: Sequence[DataFrame]) -> DataFrame:
-    """
-    Concatenate DataFrames vertically.
-
-    To concatenate horizontally, please use ``insert``.
-
-    Parameters
-    ----------
-    dataframes : Sequence[DataFrame]
-        DataFrames to concatenate.
-        Column names, ordering, and dtypes must match.
-
-    Notes
-    -----
-    The order in which the input DataFrames appear in
-    the output is preserved (so long as the DataFrame implementation supports row
-    ordering).
-    """
-    ...
-
-def column_from_sequence(sequence: Sequence[object], *, dtype: DType) -> Column:
-    """
-    Construct Column from sequence of elements.
-
-    Parameters
-    ----------
-    sequence : Sequence[object]
-        Sequence of elements. Each element must be of the specified
-        ``dtype``, the corresponding Python builtin scalar type, or
-        coercible to that Python scalar type.
-    dtype : DType
-        Dtype of result. Must be specified.
-
-    Returns
-    -------
-    Column
-    """
-    ...
-
-def dataframe_from_dict(data: Mapping[str, Column]) -> DataFrame:
-    """
-    Construct DataFrame from map of column names to Columns.
-
-    Parameters
-    ----------
-    data : Mapping[str, Column]
-        Column must be of the corresponding type of the DataFrame.
-        For example, it is only supported to build a ``LibraryXDataFrame`` using
-        ``LibraryXColumn`` instances.
-
-    Returns
-    -------
-    DataFrame
-    """
-    ...
-
-class null:
-    """
-    A `null` object to represent missing data.
-
-    ``null`` is a scalar, and may be used when constructing a `Column` from a
-    Python sequence with `column_from_sequence`. It does not support ``is``,
-    ``==`` or ``bool``.
-
-    Raises
-    ------
-    TypeError
-        From ``__eq__`` and from ``__bool__``.
-
-        For ``__eq__``: a missing value must not be compared for equality
-        directly. Instead, use `DataFrame.is_null` or `Column.is_null` to check
-        for presence of missing values.
-
-        For ``__bool__``: truthiness of a missing value is ambiguous.
-
-    Notes
-    -----
-    Like for Python scalars, the ``null`` object may be duck typed so it can
-    reside on (e.g.) a GPU. Hence, the builtin ``is`` keyword should not be
-    used to check if an object *is* the ``null`` object.
-
-    """
-    ...
-
-def is_null(value: object, /) -> bool:
-    """
-    Check if an object is a `null` scalar.
-
-    Parameters
-    ----------
-    value : object
-        Any input type is valid.
-
-    Returns
-    -------
-    bool
-        True if the input is a `null` object from the same library which
-        implements the dataframe API standard, False otherwise.
-
-    """
-
-##########
-# Dtypes #
-##########
-
-class DType:
-    """Base class for all dtypes."""
-
-class Int64(DType):
-    """Integer type with 64 bits of precision."""
-
-class Int32(DType):
-    """Integer type with 32 bits of precision."""
-
-class Int16(DType):
-    """Integer type with 16 bits of precision."""
-
-class Int8(DType):
-    """Integer type with 8 bits of precision."""
-
-class UInt64(DType):
-    """Unsigned integer type with 64 bits of precision."""
-
-class UInt32(DType):
-    """Unsigned integer type with 32 bits of precision."""
-
-class UInt16(DType):
-    """Unsigned integer type with 16 bits of precision."""
-
-class UInt8(DType):
-    """Unsigned integer type with 8 bits of precision."""
-
-class Float64(DType):
-    """Floating point type with 64 bits of precision."""
-
-class Float32(DType):
-    """Floating point type with 32 bits of precision."""
-
-class Bool(DType):
-    """Boolean type with 8 bits of precision."""
diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py
deleted file mode 100644
index 2874ba4c..00000000
--- a/spec/API_specification/dataframe_api/_types.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Types for type annotations used in the dataframe API standard.
-
-The type variables should be replaced with the actual types for a given
-library, e.g., for Pandas TypeVar('DataFrame') would be replaced with pd.DataFrame.
-"""
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import (
-    Any,
-    List,
-    Literal,
-    Optional,
-    Sequence,
-    Tuple,
-    TypeVar,
-    Union,
-    Protocol,
-)
-from enum import Enum
-
-array = TypeVar("array")
-Scalar = TypeVar("Scalar")
-device = TypeVar("device")
-DType = TypeVar("DType")
-SupportsDLPack = TypeVar("SupportsDLPack")
-SupportsBufferProtocol = TypeVar("SupportsBufferProtocol")
-PyCapsule = TypeVar("PyCapsule")
-# ellipsis cannot actually be imported from anywhere, so include a dummy here
-# to keep pyflakes happy. https://github.com/python/typeshed/issues/3556
-ellipsis = TypeVar("ellipsis")
-
-_T_co = TypeVar("_T_co", covariant=True)
-
-
-class NestedSequence(Protocol[_T_co]):
-    def __getitem__(self, key: int, /) -> Union[_T_co, NestedSequence[_T_co]]:
-        ...
-
-    def __len__(self, /) -> int:
-        ...
-
-
-__all__ = [
-    "Any",
-    "DataFrame",
-    "List",
-    "Literal",
-    "NestedSequence",
-    "Optional",
-    "PyCapsule",
-    "SupportsBufferProtocol",
-    "SupportsDLPack",
-    "Tuple",
-    "Union",
-    "Sequence",
-    "array",
-    "device",
-    "DType",
-    "ellipsis",
-    "Enum",
-]
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
deleted file mode 100644
index ffc4765e..00000000
--- a/spec/API_specification/dataframe_api/column_object.py
+++ /dev/null
@@ -1,586 +0,0 @@
-from __future__ import annotations
-
-from typing import Any,NoReturn, Sequence, TYPE_CHECKING, Literal
-
-if TYPE_CHECKING:
-    from ._types import Scalar
-    from . import DType
-
-
-__all__ = ['Column']
-
-
-class Column:
-    """
-    Column object
-
-    Note that this column object is not meant to be instantiated directly by
-    users of the library implementing the dataframe API standard. Rather, use
-    constructor functions or an already-created dataframe object retrieved via
-
-    """
-
-    def __column_namespace__(
-        self: Column, /, *, api_version: str | None = None
-    ) -> Any:
-        """
-        Returns an object that has all the Dataframe Standard API functions on it.
-
-        Parameters
-        ----------
-        api_version: Optional[str]
-            String representing the version of the dataframe API specification
-            to be returned, in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
-            If it is ``None``, it should return the namespace corresponding to
-            latest version of the dataframe API specification.  If the given
-            version is invalid or not implemented for the given module, an
-            error should be raised. Default: ``None``.
-
-        Returns
-        -------
-        namespace: Any
-            An object representing the dataframe API namespace. It should have
-            every top-level function defined in the specification as an
-            attribute. It may contain other public names as well, but it is
-            recommended to only include those names that are part of the
-            specification.
-
-        """
-    
-    @property
-    def column(self) -> object:
-        """
-        Return underlying (not-necessarily-Standard-compliant) column.
-
-        If a library only implements the Standard, then this can return `self`.
-        """
-        ...
-
-    def __len__(self) -> int:
-        """
-        Return the number of rows.
-        """
-
-    def __iter__(self) -> NoReturn:
-        """
-        Iterate over elements.
-
-        This is intentionally "poisoned" to discourage inefficient code patterns.
-
-        Raises
-        ------
-        NotImplementedError
-        """
-        raise NotImplementedError("'__iter__' is intentionally not implemented.")
-
-    @property
-    def dtype(self) -> DType:
-        """
-        Return data type of column.
-        """
-
-    def get_rows(self, indices: Column[int]) -> Column:
-        """
-        Select a subset of rows, similar to `ndarray.take`.
-
-        Parameters
-        ----------
-        indices : Column[int]
-            Positions of rows to select.
-        """
-        ...
-
-    def get_value(self, row_number: int) -> Scalar:
-        """
-        Select the value at a row number, similar to `ndarray.__getitem__(<int>)`.
-
-        Parameters
-        ----------
-        row_number : int
-            Row number of value to return.
-        
-        Returns
-        -------
-        Scalar
-            Depends on the dtype of the Column, and may vary
-            across implementations.
-        """
-        ...
-
-    def sorted_indices(
-        self,
-        *,
-        ascending: bool = True,
-        nulls_position: Literal['first', 'last'] = 'last',
-    ) -> Column[int]:
-        """
-        Return row numbers which would sort column.
-
-        If you need to sort the Column, you can simply do::
-
-            col.get_rows(col.sorted_indices())
-
-        Parameters
-        ----------
-        ascending : bool
-            If `True`, sort in ascending order.
-            If `False`, sort in descending order.
-        nulls_position : ``{'first', 'last'}``
-            Whether null values should be placed at the beginning
-            or at the end of the result.
-            Note that the position of NaNs is unspecified and may
-            vary based on the implementation.
-
-        Returns
-        -------
-        Column[int]
-        """
-        ...
-
-    def __eq__(self, other: Column | Scalar) -> Column:
-        """
-        Compare for equality.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __ne__(self, other: Column | Scalar) -> Column:
-        """
-        Compare for non-equality.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __ge__(self, other: Column | Scalar) -> Column:
-        """
-        Compare for "greater than or equal to" `other`.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __gt__(self, other: Column | Scalar) -> Column:
-        """
-        Compare for "greater than" `other`.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __le__(self, other: Column | Scalar) -> Column:
-        """
-        Compare for "less than or equal to" `other`.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __lt__(self, other: Column | Scalar) -> Column:
-        """
-        Compare for "less than" `other`.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __and__(self, other: Column[bool] | bool) -> Column[bool]:
-        """
-        Apply logical 'and' to `other` Column (or scalar) and this Column.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : Column[bool] or bool
-            If Column, must have same length.
-
-        Returns
-        -------
-        Column
-
-        Raises
-        ------
-        ValueError
-            If `self` or `other` is not boolean.
-        """
-
-    def __or__(self, other: Column[bool] | bool) -> Column[bool]:
-        """
-        Apply logical 'or' to `other` Column (or scalar) and this column.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : Column[bool] or Scalar
-            If Column, must have same length.
-
-        Returns
-        -------
-        Column[bool]
-
-        Raises
-        ------
-        ValueError
-            If `self` or `other` is not boolean.
-        """
-
-    def __add__(self, other: Column | Scalar) -> Column:
-        """
-        Add `other` column or scalar to this column.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __sub__(self, other: Column | Scalar) -> Column:
-        """
-        Subtract `other` column or scalar from this column.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __mul__(self, other: Column | Scalar) -> Column:
-        """
-        Multiply `other` column or scalar with this column.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __truediv__(self, other: Column | Scalar) -> Column:
-        """
-        Divide this column by `other` column or scalar. True division, returns floats.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __floordiv__(self, other: Column | Scalar) -> Column:
-        """
-        Floor-divide `other` column or scalar to this column.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __pow__(self, other: Column | Scalar) -> Column:
-        """
-        Raise this column to the power of `other`.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __mod__(self, other: Column | Scalar) -> Column:
-        """
-        Returns modulus of this column by `other` (`%` operator).
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __divmod__(self, other: Column | Scalar) -> tuple[Column, Column]:
-        """
-        Return quotient and remainder of integer division. See `divmod` builtin function.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __invert__(self) -> Column:
-        """
-        Invert truthiness of (boolean) elements.
-
-        Raises
-        ------
-        ValueError
-            If any of the Column's columns is not boolean.
-        """
-
-    def any(self, *, skip_nulls: bool = True) -> bool:
-        """
-        Reduction returns a bool.
-
-        Raises
-        ------
-        ValueError
-            If column is not boolean.
-        """
-
-    def all(self, *, skip_nulls: bool = True) -> bool:
-        """
-        Reduction returns a bool.
-
-        Raises
-        ------
-        ValueError
-            If column is not boolean.
-        """
-
-    def min(self, *, skip_nulls: bool = True) -> Scalar:
-        """
-        Reduction returns a scalar. Any data type that supports comparisons
-        must be supported. The returned value has the same dtype as the column.
-        """
-
-    def max(self, *, skip_nulls: bool = True) -> Scalar:
-        """
-        Reduction returns a scalar. Any data type that supports comparisons
-        must be supported. The returned value has the same dtype as the column.
-        """
-
-    def sum(self, *, skip_nulls: bool = True) -> Scalar:
-        """
-        Reduction returns a scalar. Must be supported for numerical and
-        datetime data types. The returned value has the same dtype as the
-        column.
-        """
-
-    def prod(self, *, skip_nulls: bool = True) -> Scalar:
-        """
-        Reduction returns a scalar. Must be supported for numerical data types.
-        The returned value has the same dtype as the column.
-        """
-
-    def median(self, *, skip_nulls: bool = True) -> Scalar:
-        """
-        Reduction returns a scalar. Must be supported for numerical and
-        datetime data types. Returns a float for numerical data types, and
-        datetime (with the appropriate timedelta format string) for datetime
-        dtypes.
-        """
-
-    def mean(self, *, skip_nulls: bool = True) -> Scalar:
-        """
-        Reduction returns a scalar. Must be supported for numerical and
-        datetime data types. Returns a float for numerical data types, and
-        datetime (with the appropriate timedelta format string) for datetime
-        dtypes.
-        """
-
-    def std(self, *, skip_nulls: bool = True) -> Scalar:
-        """
-        Reduction returns a scalar. Must be supported for numerical and
-        datetime data types. Returns a float for numerical data types, and
-        datetime (with the appropriate timedelta format string) for datetime
-        dtypes.
-        """
-
-    def var(self, *, skip_nulls: bool = True) -> Scalar:
-        """
-        Reduction returns a scalar. Must be supported for numerical and
-        datetime data types. Returns a float for numerical data types, and
-        datetime (with the appropriate timedelta format string) for datetime
-        dtypes.
-        """
-
-    def is_null(self) -> Column:
-        """
-        Check for 'missing' or 'null' entries.
-
-        Returns
-        -------
-        Column
-
-        See also
-        --------
-        is_nan
-
-        Notes
-        -----
-        Does *not* include NaN-like entries.
-        May optionally include 'NaT' values (if present in an implementation),
-        but note that the Standard makes no guarantees about them.
-        """
-
-    def is_nan(self) -> Column:
-        """
-        Check for nan entries.
-
-        Returns
-        -------
-        Column
-
-        See also
-        --------
-        is_null
-
-        Notes
-        -----
-        This only checks for 'NaN'.
-        Does *not* include 'missing' or 'null' entries.
-        In particular, does not check for `np.timedelta64('NaT')`.
-        """
-
-    def is_in(self, values: Column) -> Column[bool]:
-        """
-        Indicate whether the value at each row matches any value in `values`.
-
-        Parameters
-        ----------
-        values : Column
-            Contains values to compare against. May include ``float('nan')`` and
-            ``null``, in which case ``'nan'`` and ``null`` will
-            respectively return ``True`` even though ``float('nan') == float('nan')``
-            isn't ``True``.
-            The dtype of ``values`` must match the current column's dtype.
-
-        Returns
-        -------
-        Column[bool]
-        """
-
-    def unique_indices(self, *, skip_nulls: bool = True) -> Column[int]:
-        """
-        Return indices corresponding to unique values in Column.
-
-        Returns
-        -------
-        Column[int]
-            Indices corresponding to unique values.
-
-        Notes
-        -----
-        There are no ordering guarantees. In particular, if there are multiple
-        indices corresponding to the same unique value, there is no guarantee
-        about which one will appear in the result.
-        If the original Column contains multiple `'NaN'` values, then
-        only a single index corresponding to those values should be returned.
-        Likewise for null values (if ``skip_nulls=False``).
-        To get the unique values, you can do ``col.get_rows(col.unique_indices())``.
-        """
-        ...
-
-    def fill_nan(self, value: float | 'null', /) -> Column:
-        """
-        Fill floating point ``nan`` values with the given fill value.
-
-        Parameters
-        ----------
-        value : float or `null`
-            Value used to replace any ``nan`` in the column with. Must be
-            of the Python scalar type matching the dtype of the column (or
-            be `null`).
-
-        """
-        ...
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
deleted file mode 100644
index 6dc2c787..00000000
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ /dev/null
@@ -1,754 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, Literal, Mapping, Sequence, Union, TYPE_CHECKING, NoReturn
-
-
-if TYPE_CHECKING:
-    from .column_object import Column
-    from .groupby_object import GroupBy
-    from ._types import Scalar
-
-
-__all__ = ["DataFrame"]
-
-
-class DataFrame:
-    """
-    DataFrame object
-
-    Note that this dataframe object is not meant to be instantiated directly by
-    users of the library implementing the dataframe API standard. Rather, use
-    constructor functions or an already-created dataframe object retrieved via
-    
-    **Python operator support**
-
-    All arithmetic operators defined by the Python language, except for
-    ``__matmul__``, ``__neg__`` and ``__pos__``, must be supported for
-    numerical data types.
-
-    All comparison operators defined by the Python language must be supported
-    by the dataframe object for all data types for which those comparisons are
-    supported by the builtin scalar types corresponding to a data type.
-
-    In-place operators must not be supported. All operations on the dataframe
-    object are out-of-place.
-
-    **Methods and Attributes**
-
-    """
-    def __dataframe_namespace__(
-        self: DataFrame, /, *, api_version: str | None = None
-    ) -> Any:
-        """
-        Returns an object that has all the dataframe API functions on it.
-
-        Parameters
-        ----------
-        api_version: Optional[str]
-            String representing the version of the dataframe API specification
-            to be returned, in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
-            If it is ``None``, it should return the namespace corresponding to
-            latest version of the dataframe API specification.  If the given
-            version is invalid or not implemented for the given module, an
-            error should be raised. Default: ``None``.
-
-        Returns
-        -------
-        namespace: Any
-            An object representing the dataframe API namespace. It should have
-            every top-level function defined in the specification as an
-            attribute. It may contain other public names as well, but it is
-            recommended to only include those names that are part of the
-            specification.
-
-        """
-
-    @property
-    def dataframe(self) -> object:
-        """
-        Return underlying (not-necessarily-Standard-compliant) DataFrame.
-
-        If a library only implements the Standard, then this can return `self`.
-        """
-        ...
-    
-    def shape(self) -> tuple[int, int]:
-        """
-        Return number of rows and number of columns.
-        """
-
-    def groupby(self, keys: Sequence[str], /) -> GroupBy:
-        """
-        Group the DataFrame by the given columns.
-
-        Parameters
-        ----------
-        keys : Sequence[str]
-
-        Returns
-        -------
-        GroupBy
-
-        Raises
-        ------
-        KeyError
-            If any of the requested keys are not present.
-
-        Notes
-        -----
-        Downstream operations from this function, like aggregations, return
-        results for which row order is not guaranteed and is implementation
-        defined.
-        """
-        ...
-
-    def get_column_by_name(self, name: str, /) -> Column:
-        """
-        Select a column by name.
-
-        Parameters
-        ----------
-        name : str
-
-        Returns
-        -------
-        Column
-
-        Raises
-        ------
-        KeyError
-            If the key is not present.
-        """
-        ...
-
-    def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame:
-        """
-        Select multiple columns by name.
-
-        Parameters
-        ----------
-        names : Sequence[str]
-
-        Returns
-        -------
-        DataFrame
-
-        Raises
-        ------
-        KeyError
-            If the any requested key is not present.
-        """
-        ...
-
-    def get_rows(self, indices: "Column[int]") -> DataFrame:
-        """
-        Select a subset of rows, similar to `ndarray.take`.
-
-        Parameters
-        ----------
-        indices : Column[int]
-            Positions of rows to select.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def slice_rows(
-        self, start: int | None, stop: int | None, step: int | None
-    ) -> DataFrame:
-        """
-        Select a subset of rows corresponding to a slice.
-
-        Parameters
-        ----------
-        start : int or None
-        stop : int or None
-        step : int or None
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def get_rows_by_mask(self, mask: "Column[bool]") -> DataFrame:
-        """
-        Select a subset of rows corresponding to a mask.
-
-        Parameters
-        ----------
-        mask : Column[bool]
-
-        Returns
-        -------
-        DataFrame
-
-        Notes
-        -----
-        Some participants preferred a weaker type Arraylike[bool] for mask,
-        where 'Arraylike' denotes an object adhering to the Array API standard.
-        """
-        ...
-
-    def insert(self, loc: int, label: str, value: Column) -> DataFrame:
-        """
-        Insert column into DataFrame at specified location.
-
-        Parameters
-        ----------
-        loc : int
-            Insertion index. Must verify 0 <= loc <= len(columns).
-        label : str
-            Label of the inserted column.
-        value : Column
-        """
-        ...
-
-    def drop_column(self, label: str) -> DataFrame:
-        """
-        Drop the specified column.
-
-        Parameters
-        ----------
-        label : str
-
-        Returns
-        -------
-        DataFrame
-
-        Raises
-        ------
-        KeyError
-            If the label is not present.
-        """
-        ...
-
-    def rename_columns(self, mapping: Mapping[str, str]) -> DataFrame:
-        """
-        Rename columns.
-
-        Parameters
-        ----------
-        mapping : Mapping[str, str]
-            Keys are old column names, values are new column names.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def get_column_names(self) -> Sequence[str]:
-        """
-        Get column names.
-
-        Returns
-        -------
-        Sequence[str]
-        """
-        ...
-
-    def sorted_indices(
-        self,
-        keys: Sequence[str],
-        *,
-        ascending: Sequence[bool] | bool = True,
-        nulls_position: Literal['first', 'last'] = 'last',
-    ) -> Column[int]:
-        """
-        Return row numbers which would sort according to given columns.
-
-        If you need to sort the DataFrame, you can simply do::
-
-            df.get_rows(df.sorted_indices(keys))
-
-        Parameters
-        ----------
-        keys : Sequence[str]
-            Names of columns to sort by.
-        ascending : Sequence[bool] or bool
-            If `True`, sort by all keys in ascending order.
-            If `False`, sort by all keys in descending order.
-            If a sequence, it must be the same length as `keys`,
-            and determines the direction with which to use each
-            key to sort by.
-        nulls_position : ``{'first', 'last'}``
-            Whether null values should be placed at the beginning
-            or at the end of the result.
-            Note that the position of NaNs is unspecified and may
-            vary based on the implementation.
-
-        Returns
-        -------
-        Column[int]
-    
-        Raises
-        ------
-        ValueError
-            If `keys` and `ascending` are sequences of different lengths.
-        """
-        ...
-
-    def __eq__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Compare for equality.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __ne__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Compare for non-equality.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __ge__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Compare for "greater than or equal to" `other`.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __gt__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Compare for "greater than" `other`.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __le__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Compare for "less than or equal to" `other`.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __lt__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Compare for "less than" `other`.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __and__(self, other: DataFrame[bool] | bool) -> DataFrame[bool]:
-        """
-        Apply logical 'and' to `other` DataFrame (or scalar) and this dataframe.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : DataFrame[bool] or bool
-            If DataFrame, must have same length.
-
-        Returns
-        -------
-        DataFrame[bool]
-
-        Raises
-        ------
-        ValueError
-            If `self` or `other` is not boolean.
-        """
-
-    def __or__(self, other: DataFrame[bool] | bool) -> DataFrame[bool]:
-        """
-        Apply logical 'or' to `other` DataFrame (or scalar) and this DataFrame.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : DataFrame[bool] or bool
-            If DataFrame, must have same length.
-
-        Returns
-        -------
-        DataFrame[bool]
-
-        Raises
-        ------
-        ValueError
-            If `self` or `other` is not boolean.
-        """
-
-    def __add__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Add `other` dataframe or scalar to this dataframe.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __sub__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Subtract `other` dataframe or scalar from this dataframe.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __mul__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Multiply  `other` dataframe or scalar with this dataframe.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __truediv__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Divide  this dataframe by `other` dataframe or scalar. True division, returns floats.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __floordiv__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Floor-divide (returns integers) this dataframe by `other` dataframe or scalar.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __pow__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Raise this dataframe to the power of `other`.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __mod__(self, other: DataFrame | Scalar) -> DataFrame:
-        """
-        Return modulus of this dataframe by `other` (`%` operator).
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __divmod__(self, other: DataFrame | Scalar) -> tuple[DataFrame, DataFrame]:
-        """
-        Return quotient and remainder of integer division. See `divmod` builtin function.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        A tuple of two DataFrame's
-        """
-        ...
-
-    def __invert__(self) -> DataFrame:
-        """
-        Invert truthiness of (boolean) elements.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
-    def __iter__(self) -> NoReturn:
-        """
-        Iterate over elements.
-
-        This is intentionally "poisoned" to discourage inefficient code patterns.
-
-        Raises
-        ------
-        NotImplementedError
-        """
-        raise NotImplementedError("'__iter__' is intentionally not implemented.")
-
-    def any(self, *, skip_nulls: bool = True) -> DataFrame:
-        """
-        Reduction returns a 1-row DataFrame.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
-    def all(self, *, skip_nulls: bool = True) -> DataFrame:
-        """
-        Reduction returns a 1-row DataFrame.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-    
-    def any_rowwise(self, *, skip_nulls: bool = True) -> Column:
-        """
-        Reduction returns a Column.
-
-        Differs from ``DataFrame.any`` and that the reduction happens
-        for each row, rather than for each column.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
-    def all_rowwise(self, *, skip_nulls: bool = True) -> Column:
-        """
-        Reduction returns a Column.
-
-        Differs from ``DataFrame.all`` and that the reduction happens
-        for each row, rather than for each column.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
-    def min(self, *, skip_nulls: bool = True) -> DataFrame:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def max(self, *, skip_nulls: bool = True) -> DataFrame:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def sum(self, *, skip_nulls: bool = True) -> DataFrame:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def prod(self, *, skip_nulls: bool = True) -> DataFrame:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def median(self, *, skip_nulls: bool = True) -> DataFrame:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def mean(self, *, skip_nulls: bool = True) -> DataFrame:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def std(self, *, skip_nulls: bool = True) -> DataFrame:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def var(self, *, skip_nulls: bool = True) -> DataFrame:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def is_null(self) -> DataFrame:
-        """
-        Check for 'missing' or 'null' entries.
-
-        Returns
-        -------
-        DataFrame
-
-        See also
-        --------
-        is_nan
-
-        Notes
-        -----
-        Does *not* include NaN-like entries.
-        May optionally include 'NaT' values (if present in an implementation),
-        but note that the Standard makes no guarantees about them.
-        """
-        ...
-
-    def is_nan(self) -> DataFrame:
-        """
-        Check for nan entries.
-
-        Returns
-        -------
-        DataFrame
-
-        See also
-        --------
-        is_null
-
-        Notes
-        -----
-        This only checks for 'NaN'.
-        Does *not* include 'missing' or 'null' entries.
-        In particular, does not check for `np.timedelta64('NaT')`.
-        """
-        ...
-
-    def fill_nan(self, value: float | 'null', /) -> DataFrame:
-        """
-        Fill ``nan`` values with the given fill value.
-
-        The fill operation will apply to all columns with a floating-point
-        dtype. Other columns remain unchanged.
-
-        Parameters
-        ----------
-        value : float or `null`
-            Value used to replace any ``nan`` in the column with. Must be
-            of the Python scalar type matching the dtype of the column (or
-            be `null`).
-
-        """
-        ...
diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py
deleted file mode 100644
index cfc7bc62..00000000
--- a/spec/API_specification/dataframe_api/groupby_object.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from .dataframe_object import DataFrame
-
-
-__all__ = ['GroupBy']
-
-
-class GroupBy:
-    """
-    GroupBy object.
-
-    Note that this class is not meant to be constructed by users.
-    It is returned from `DataFrame.groupby`.
-
-    **Methods**
-
-    """
-    def any(self, *, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def all(self, *, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def min(self, *, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def max(self, *, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def sum(self, *, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def prod(self, *, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def median(self, *, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def mean(self, *, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def std(self, *, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def var(self, *, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def size(self) -> "DataFrame":
-        ...

From da5324e7646f212b25370f0a2019d6f3e718b5e2 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 09:04:58 +0100
Subject: [PATCH 02/38] get mypy --strict passing

---
 .../dataframe_api/__init__.pyi                | 195 +++++
 .../dataframe_api/_types.pyi                  |  63 ++
 .../dataframe_api/column_object.pyi           | 586 ++++++++++++++
 .../dataframe_api/dataframe_object.pyi        | 756 ++++++++++++++++++
 .../dataframe_api/groupby_object.pyi          |  54 ++
 5 files changed, 1654 insertions(+)
 create mode 100644 spec/API_specification/dataframe_api/__init__.pyi
 create mode 100644 spec/API_specification/dataframe_api/_types.pyi
 create mode 100644 spec/API_specification/dataframe_api/column_object.pyi
 create mode 100644 spec/API_specification/dataframe_api/dataframe_object.pyi
 create mode 100644 spec/API_specification/dataframe_api/groupby_object.pyi

diff --git a/spec/API_specification/dataframe_api/__init__.pyi b/spec/API_specification/dataframe_api/__init__.pyi
new file mode 100644
index 00000000..ed0d089c
--- /dev/null
+++ b/spec/API_specification/dataframe_api/__init__.pyi
@@ -0,0 +1,195 @@
+"""
+Function stubs and API documentation for the DataFrame API standard.
+"""
+from __future__ import annotations
+
+from typing import Mapping, Sequence, Any, Generic, TypeVar
+
+from .column_object import *
+from .dataframe_object import DataFrame
+from .groupby_object import *
+
+T = TypeVar("T", bound=DType)
+
+__all__ = [
+    "__dataframe_api_version",
+    "column_from_sequence",
+    "concat",
+    "dataframe_from_dict",
+    "is_null",
+    "null",
+    "DType",
+    "Int64",
+    "Int32",
+    "Int16",
+    "Int8",
+    "UInt64",
+    "UInt32",
+    "UInt16",
+    "UInt8",
+    "Float64",
+    "Float32",
+    "Bool",
+]
+
+
+__dataframe_api_version__: str = "YYYY.MM"
+"""
+String representing the version of the DataFrame API specification to which
+the conforming implementation adheres. Set to a concrete value for a stable
+implementation of the dataframe API standard.
+"""
+
+def concat(dataframes: Sequence[DataFrame[Any]]) -> DataFrame[Any]:
+    """
+    Concatenate DataFrames vertically.
+
+    To concatenate horizontally, please use ``insert``.
+
+    Parameters
+    ----------
+    dataframes : Sequence[DataFrame]
+        DataFrames to concatenate.
+        Column names, ordering, and dtypes must match.
+
+    Notes
+    -----
+    The order in which the input DataFrames appear in
+    the output is preserved (so long as the DataFrame implementation supports row
+    ordering).
+    """
+    ...
+
+def column_from_sequence(sequence: Sequence[Scalar[DType]], *, dtype: DType) -> Column[DType]:
+    """
+    Construct Column from sequence of elements.
+
+    Parameters
+    ----------
+    sequence : Sequence[object]
+        Sequence of elements. Each element must be of the specified
+        ``dtype``, the corresponding Python builtin scalar type, or
+        coercible to that Python scalar type.
+    dtype : DType
+        Dtype of result. Must be specified.
+
+    Returns
+    -------
+    Column
+    """
+    ...
+
+def dataframe_from_dict(data: Mapping[str, Column[Any]]) -> DataFrame[Any]:
+    """
+    Construct DataFrame from map of column names to Columns.
+
+    Parameters
+    ----------
+    data : Mapping[str, Column]
+        Column must be of the corresponding type of the DataFrame.
+        For example, it is only supported to build a ``LibraryXDataFrame`` using
+        ``LibraryXColumn`` instances.
+
+    Returns
+    -------
+    DataFrame
+    """
+    ...
+
+class null:
+    """
+    A `null` object to represent missing data.
+
+    ``null`` is a scalar, and may be used when constructing a `Column` from a
+    Python sequence with `column_from_sequence`. It does not support ``is``,
+    ``==`` or ``bool``.
+
+    Raises
+    ------
+    TypeError
+        From ``__eq__`` and from ``__bool__``.
+
+        For ``__eq__``: a missing value must not be compared for equality
+        directly. Instead, use `DataFrame.is_null` or `Column.is_null` to check
+        for presence of missing values.
+
+        For ``__bool__``: truthiness of a missing value is ambiguous.
+
+    Notes
+    -----
+    Like for Python scalars, the ``null`` object may be duck typed so it can
+    reside on (e.g.) a GPU. Hence, the builtin ``is`` keyword should not be
+    used to check if an object *is* the ``null`` object.
+
+    """
+    ...
+
+def is_null(value: object, /) -> bool:
+    """
+    Check if an object is a `null` scalar.
+
+    Parameters
+    ----------
+    value : object
+        Any input type is valid.
+
+    Returns
+    -------
+    bool
+        True if the input is a `null` object from the same library which
+        implements the dataframe API standard, False otherwise.
+
+    """
+
+##########
+# Dtypes #
+##########
+
+class DType:
+    """Base class for all dtypes."""
+
+class IntDType(DType):
+    """Base class for all integer dtypes."""
+
+class FloatDType(DType):
+    """Base class for all float dtypes."""
+
+class Int64(IntDType):
+    """Integer type with 64 bits of precision."""
+
+class Int32(IntDType):
+    """Integer type with 32 bits of precision."""
+
+class Int16(IntDType):
+    """Integer type with 16 bits of precision."""
+
+class Int8(IntDType):
+    """Integer type with 8 bits of precision."""
+
+class UInt64(IntDType):
+    """Unsigned integer type with 64 bits of precision."""
+
+class UInt32(IntDType):
+    """Unsigned integer type with 32 bits of precision."""
+
+class UInt16(IntDType):
+    """Unsigned integer type with 16 bits of precision."""
+
+class UInt8(IntDType):
+    """Unsigned integer type with 8 bits of precision."""
+
+class Float64(FloatDType):
+    """Floating point type with 64 bits of precision."""
+
+class Float32(FloatDType):
+    """Floating point type with 32 bits of precision."""
+
+class Bool(DType):
+    """Boolean type with 8 bits of precision."""
+
+##########
+# Scalar #
+##########
+
+class Scalar(Generic[T]):
+    ...
diff --git a/spec/API_specification/dataframe_api/_types.pyi b/spec/API_specification/dataframe_api/_types.pyi
new file mode 100644
index 00000000..2874ba4c
--- /dev/null
+++ b/spec/API_specification/dataframe_api/_types.pyi
@@ -0,0 +1,63 @@
+"""
+Types for type annotations used in the dataframe API standard.
+
+The type variables should be replaced with the actual types for a given
+library, e.g., for Pandas TypeVar('DataFrame') would be replaced with pd.DataFrame.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import (
+    Any,
+    List,
+    Literal,
+    Optional,
+    Sequence,
+    Tuple,
+    TypeVar,
+    Union,
+    Protocol,
+)
+from enum import Enum
+
+array = TypeVar("array")
+Scalar = TypeVar("Scalar")
+device = TypeVar("device")
+DType = TypeVar("DType")
+SupportsDLPack = TypeVar("SupportsDLPack")
+SupportsBufferProtocol = TypeVar("SupportsBufferProtocol")
+PyCapsule = TypeVar("PyCapsule")
+# ellipsis cannot actually be imported from anywhere, so include a dummy here
+# to keep pyflakes happy. https://github.com/python/typeshed/issues/3556
+ellipsis = TypeVar("ellipsis")
+
+_T_co = TypeVar("_T_co", covariant=True)
+
+
+class NestedSequence(Protocol[_T_co]):
+    def __getitem__(self, key: int, /) -> Union[_T_co, NestedSequence[_T_co]]:
+        ...
+
+    def __len__(self, /) -> int:
+        ...
+
+
+__all__ = [
+    "Any",
+    "DataFrame",
+    "List",
+    "Literal",
+    "NestedSequence",
+    "Optional",
+    "PyCapsule",
+    "SupportsBufferProtocol",
+    "SupportsDLPack",
+    "Tuple",
+    "Union",
+    "Sequence",
+    "array",
+    "device",
+    "DType",
+    "ellipsis",
+    "Enum",
+]
diff --git a/spec/API_specification/dataframe_api/column_object.pyi b/spec/API_specification/dataframe_api/column_object.pyi
new file mode 100644
index 00000000..a06c993a
--- /dev/null
+++ b/spec/API_specification/dataframe_api/column_object.pyi
@@ -0,0 +1,586 @@
+from __future__ import annotations
+
+from typing import Any,NoReturn, Sequence, TYPE_CHECKING, Literal, Generic, TypeVar
+
+if TYPE_CHECKING:
+    from . import DType, IntDType, FloatDType, Bool, null, Scalar
+
+T = TypeVar('T', bound=DType)
+
+__all__ = ['Column']
+
+
+class Column(Generic[T]):
+    """
+    Column object
+
+    Note that this column object is not meant to be instantiated directly by
+    users of the library implementing the dataframe API standard. Rather, use
+    constructor functions or an already-created dataframe object retrieved via
+
+    """
+
+    def __column_namespace__(
+        self, /, *, api_version: str | None = None
+    ) -> Any:
+        """
+        Returns an object that has all the Dataframe Standard API functions on it.
+
+        Parameters
+        ----------
+        api_version: Optional[str]
+            String representing the version of the dataframe API specification
+            to be returned, in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
+            If it is ``None``, it should return the namespace corresponding to
+            latest version of the dataframe API specification.  If the given
+            version is invalid or not implemented for the given module, an
+            error should be raised. Default: ``None``.
+
+        Returns
+        -------
+        namespace: Any
+            An object representing the dataframe API namespace. It should have
+            every top-level function defined in the specification as an
+            attribute. It may contain other public names as well, but it is
+            recommended to only include those names that are part of the
+            specification.
+
+        """
+    
+    @property
+    def column(self) -> object:
+        """
+        Return underlying (not-necessarily-Standard-compliant) column.
+
+        If a library only implements the Standard, then this can return `self`.
+        """
+        ...
+
+    def __len__(self) -> int:
+        """
+        Return the number of rows.
+        """
+
+    def __iter__(self) -> NoReturn:
+        """
+        Iterate over elements.
+
+        This is intentionally "poisoned" to discourage inefficient code patterns.
+
+        Raises
+        ------
+        NotImplementedError
+        """
+        raise NotImplementedError("'__iter__' is intentionally not implemented.")
+
+    @property
+    def dtype(self) -> DType:
+        """
+        Return data type of column.
+        """
+
+    def get_rows(self, indices: Column[IntDType]) -> Column[T]:
+        """
+        Select a subset of rows, similar to `ndarray.take`.
+
+        Parameters
+        ----------
+        indices : Column[IntDType]
+            Positions of rows to select.
+        """
+        ...
+
+    def get_value(self, row_number: int) -> Scalar[T]:
+        """
+        Select the value at a row number, similar to `ndarray.__getitem__(<int>)`.
+
+        Parameters
+        ----------
+        row_number : int
+            Row number of value to return.
+        
+        Returns
+        -------
+        Scalar
+            Depends on the dtype of the Column, and may vary
+            across implementations.
+        """
+        ...
+
+    def sorted_indices(
+        self,
+        *,
+        ascending: bool = True,
+        nulls_position: Literal['first', 'last'] = 'last',
+    ) -> Column[IntDType]:
+        """
+        Return row numbers which would sort column.
+
+        If you need to sort the Column, you can simply do::
+
+            col.get_rows(col.sorted_indices())
+
+        Parameters
+        ----------
+        ascending : bool
+            If `True`, sort in ascending order.
+            If `False`, sort in descending order.
+        nulls_position : ``{'first', 'last'}``
+            Whether null values should be placed at the beginning
+            or at the end of the result.
+            Note that the position of NaNs is unspecified and may
+            vary based on the implementation.
+
+        Returns
+        -------
+        Column[IntDType]
+        """
+        ...
+
+    def __eq__(self, other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:  # type: ignore[override]
+        """
+        Compare for equality.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __ne__(self, other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:  # type: ignore[override]
+        """
+        Compare for non-equality.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __ge__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Compare for "greater than or equal to" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __gt__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Compare for "greater than" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __le__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Compare for "less than or equal to" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __lt__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Compare for "less than" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __and__(self, other: Column[Bool] | bool) -> Column[Bool]:
+        """
+        Apply logical 'and' to `other` Column (or scalar) and this Column.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : Column[bool] or bool
+            If Column, must have same length.
+
+        Returns
+        -------
+        Column
+
+        Raises
+        ------
+        ValueError
+            If `self` or `other` is not boolean.
+        """
+
+    def __or__(self, other: Column[Bool] | bool) -> Column[Bool]:
+        """
+        Apply logical 'or' to `other` Column (or scalar) and this column.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : Column[bool] or Scalar
+            If Column, must have same length.
+
+        Returns
+        -------
+        Column[bool]
+
+        Raises
+        ------
+        ValueError
+            If `self` or `other` is not boolean.
+        """
+
+    def __add__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Add `other` column or scalar to this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __sub__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Subtract `other` column or scalar from this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __mul__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Multiply `other` column or scalar with this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __truediv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Divide this column by `other` column or scalar. True division, returns floats.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Floor-divide `other` column or scalar to this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __pow__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Raise this column to the power of `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __mod__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Returns modulus of this column by `other` (`%` operator).
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __divmod__(self, other: Column[Any] | Scalar[Any]) -> tuple[Column[IntDType], Column[IntDType]]:
+        """
+        Return quotient and remainder of integer division. See `divmod` builtin function.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __invert__(self) -> Column[Bool]:
+        """
+        Invert truthiness of (boolean) elements.
+
+        Raises
+        ------
+        ValueError
+            If any of the Column's columns is not boolean.
+        """
+
+    def any(self, *, skip_nulls: bool = True) -> bool:
+        """
+        Reduction returns a bool.
+
+        Raises
+        ------
+        ValueError
+            If column is not boolean.
+        """
+
+    def all(self, *, skip_nulls: bool = True) -> bool:
+        """
+        Reduction returns a bool.
+
+        Raises
+        ------
+        ValueError
+            If column is not boolean.
+        """
+
+    def min(self, *, skip_nulls: bool = True) -> Scalar[T]:
+        """
+        Reduction returns a scalar. Any data type that supports comparisons
+        must be supported. The returned value has the same dtype as the column.
+        """
+
+    def max(self, *, skip_nulls: bool = True) -> Scalar[T]:
+        """
+        Reduction returns a scalar. Any data type that supports comparisons
+        must be supported. The returned value has the same dtype as the column.
+        """
+
+    def sum(self, *, skip_nulls: bool = True) -> Scalar[T]:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. The returned value has the same dtype as the
+        column.
+        """
+
+    def prod(self, *, skip_nulls: bool = True) -> Scalar[T]:
+        """
+        Reduction returns a scalar. Must be supported for numerical data types.
+        The returned value has the same dtype as the column.
+        """
+
+    def median(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def mean(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def std(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def var(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def is_null(self) -> Column[Bool]:
+        """
+        Check for 'missing' or 'null' entries.
+
+        Returns
+        -------
+        Column
+
+        See also
+        --------
+        is_nan
+
+        Notes
+        -----
+        Does *not* include NaN-like entries.
+        May optionally include 'NaT' values (if present in an implementation),
+        but note that the Standard makes no guarantees about them.
+        """
+
+    def is_nan(self) -> Column[Bool]:
+        """
+        Check for nan entries.
+
+        Returns
+        -------
+        Column
+
+        See also
+        --------
+        is_null
+
+        Notes
+        -----
+        This only checks for 'NaN'.
+        Does *not* include 'missing' or 'null' entries.
+        In particular, does not check for `np.timedelta64('NaT')`.
+        """
+
+    def is_in(self, values: Column[T]) -> Column[Bool]:
+        """
+        Indicate whether the value at each row matches any value in `values`.
+
+        Parameters
+        ----------
+        values : Column
+            Contains values to compare against. May include ``float('nan')`` and
+            ``null``, in which case ``'nan'`` and ``null`` will
+            respectively return ``True`` even though ``float('nan') == float('nan')``
+            isn't ``True``.
+            The dtype of ``values`` must match the current column's dtype.
+
+        Returns
+        -------
+        Column[bool]
+        """
+
+    def unique_indices(self, *, skip_nulls: bool = True) -> Column[IntDType]:
+        """
+        Return indices corresponding to unique values in Column.
+
+        Returns
+        -------
+        Column[IntDType]
+            Indices corresponding to unique values.
+
+        Notes
+        -----
+        There are no ordering guarantees. In particular, if there are multiple
+        indices corresponding to the same unique value, there is no guarantee
+        about which one will appear in the result.
+        If the original Column contains multiple `'NaN'` values, then
+        only a single index corresponding to those values should be returned.
+        Likewise for null values (if ``skip_nulls=False``).
+        To get the unique values, you can do ``col.get_rows(col.unique_indices())``.
+        """
+        ...
+
+    def fill_nan(self, value: float | 'null', /) -> Column[T]:
+        """
+        Fill floating point ``nan`` values with the given fill value.
+
+        Parameters
+        ----------
+        value : float or `null`
+            Value used to replace any ``nan`` in the column with. Must be
+            of the Python scalar type matching the dtype of the column (or
+            be `null`).
+
+        """
+        ...
diff --git a/spec/API_specification/dataframe_api/dataframe_object.pyi b/spec/API_specification/dataframe_api/dataframe_object.pyi
new file mode 100644
index 00000000..14a1f29c
--- /dev/null
+++ b/spec/API_specification/dataframe_api/dataframe_object.pyi
@@ -0,0 +1,756 @@
+from __future__ import annotations
+
+from typing import Any, Literal, Mapping, Sequence, Union, TYPE_CHECKING, NoReturn, TypeVar, Generic
+
+
+if TYPE_CHECKING:
+    from .column_object import Column
+    from .groupby_object import GroupBy
+    from . import DType, IntDType, FloatDType, Bool, null, Scalar
+
+
+__all__ = ["DataFrame"]
+
+T = TypeVar("T", bound=DType)
+
+
+class DataFrame(Generic[T]):
+    """
+    DataFrame object
+
+    Note that this dataframe object is not meant to be instantiated directly by
+    users of the library implementing the dataframe API standard. Rather, use
+    constructor functions or an already-created dataframe object retrieved via
+    
+    **Python operator support**
+
+    All arithmetic operators defined by the Python language, except for
+    ``__matmul__``, ``__neg__`` and ``__pos__``, must be supported for
+    numerical data types.
+
+    All comparison operators defined by the Python language must be supported
+    by the dataframe object for all data types for which those comparisons are
+    supported by the builtin scalar types corresponding to a data type.
+
+    In-place operators must not be supported. All operations on the dataframe
+    object are out-of-place.
+
+    **Methods and Attributes**
+
+    """
+    def __dataframe_namespace__(
+        self, /, *, api_version: str | None = None
+    ) -> Any:
+        """
+        Returns an object that has all the dataframe API functions on it.
+
+        Parameters
+        ----------
+        api_version: Optional[str]
+            String representing the version of the dataframe API specification
+            to be returned, in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
+            If it is ``None``, it should return the namespace corresponding to
+            latest version of the dataframe API specification.  If the given
+            version is invalid or not implemented for the given module, an
+            error should be raised. Default: ``None``.
+
+        Returns
+        -------
+        namespace: Any
+            An object representing the dataframe API namespace. It should have
+            every top-level function defined in the specification as an
+            attribute. It may contain other public names as well, but it is
+            recommended to only include those names that are part of the
+            specification.
+
+        """
+
+    @property
+    def dataframe(self) -> object:
+        """
+        Return underlying (not-necessarily-Standard-compliant) DataFrame.
+
+        If a library only implements the Standard, then this can return `self`.
+        """
+        ...
+    
+    def shape(self) -> tuple[int, int]:
+        """
+        Return number of rows and number of columns.
+        """
+
+    def groupby(self, keys: Sequence[str], /) -> GroupBy:
+        """
+        Group the DataFrame by the given columns.
+
+        Parameters
+        ----------
+        keys : Sequence[str]
+
+        Returns
+        -------
+        GroupBy
+
+        Raises
+        ------
+        KeyError
+            If any of the requested keys are not present.
+
+        Notes
+        -----
+        Downstream operations from this function, like aggregations, return
+        results for which row order is not guaranteed and is implementation
+        defined.
+        """
+        ...
+
+    def get_column_by_name(self, name: str, /) -> Column[T]:
+        """
+        Select a column by name.
+
+        Parameters
+        ----------
+        name : str
+
+        Returns
+        -------
+        Column
+
+        Raises
+        ------
+        KeyError
+            If the key is not present.
+        """
+        ...
+
+    def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame[T]:
+        """
+        Select multiple columns by name.
+
+        Parameters
+        ----------
+        names : Sequence[str]
+
+        Returns
+        -------
+        DataFrame
+
+        Raises
+        ------
+        KeyError
+            If the any requested key is not present.
+        """
+        ...
+
+    def get_rows(self, indices: Column[IntDType]) -> DataFrame[T]:
+        """
+        Select a subset of rows, similar to `ndarray.take`.
+
+        Parameters
+        ----------
+        indices : Column[IntDType]
+            Positions of rows to select.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def slice_rows(
+        self, start: int | None, stop: int | None, step: int | None
+    ) -> DataFrame[T]:
+        """
+        Select a subset of rows corresponding to a slice.
+
+        Parameters
+        ----------
+        start : int or None
+        stop : int or None
+        step : int or None
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def get_rows_by_mask(self, mask: Column[Bool]) -> DataFrame[T]:
+        """
+        Select a subset of rows corresponding to a mask.
+
+        Parameters
+        ----------
+        mask : Column[bool]
+
+        Returns
+        -------
+        DataFrame
+
+        Notes
+        -----
+        Some participants preferred a weaker type Arraylike[bool] for mask,
+        where 'Arraylike' denotes an object adhering to the Array API standard.
+        """
+        ...
+
+    def insert(self, loc: int, label: str, value: Column[Any]) -> DataFrame[Any]:
+        """
+        Insert column into DataFrame at specified location.
+
+        Parameters
+        ----------
+        loc : int
+            Insertion index. Must verify 0 <= loc <= len(columns).
+        label : str
+            Label of the inserted column.
+        value : Column
+        """
+        ...
+
+    def drop_column(self, label: str) -> DataFrame[T]:
+        """
+        Drop the specified column.
+
+        Parameters
+        ----------
+        label : str
+
+        Returns
+        -------
+        DataFrame
+
+        Raises
+        ------
+        KeyError
+            If the label is not present.
+        """
+        ...
+
+    def rename_columns(self, mapping: Mapping[str, str]) -> DataFrame[T]:
+        """
+        Rename columns.
+
+        Parameters
+        ----------
+        mapping : Mapping[str, str]
+            Keys are old column names, values are new column names.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def get_column_names(self) -> Sequence[str]:
+        """
+        Get column names.
+
+        Returns
+        -------
+        Sequence[str]
+        """
+        ...
+
+    def sorted_indices(
+        self,
+        keys: Sequence[str],
+        *,
+        ascending: Sequence[bool] | bool = True,
+        nulls_position: Literal['first', 'last'] = 'last',
+    ) -> Column[IntDType]:
+        """
+        Return row numbers which would sort according to given columns.
+
+        If you need to sort the DataFrame, you can simply do::
+
+            df.get_rows(df.sorted_indices(keys))
+
+        Parameters
+        ----------
+        keys : Sequence[str]
+            Names of columns to sort by.
+        ascending : Sequence[bool] or bool
+            If `True`, sort by all keys in ascending order.
+            If `False`, sort by all keys in descending order.
+            If a sequence, it must be the same length as `keys`,
+            and determines the direction with which to use each
+            key to sort by.
+        nulls_position : ``{'first', 'last'}``
+            Whether null values should be placed at the beginning
+            or at the end of the result.
+            Note that the position of NaNs is unspecified and may
+            vary based on the implementation.
+
+        Returns
+        -------
+        Column[IntDType]
+    
+        Raises
+        ------
+        ValueError
+            If `keys` and `ascending` are sequences of different lengths.
+        """
+        ...
+
+    def __eq__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:  # type: ignore[override]
+        """
+        Compare for equality.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __ne__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:  # type: ignore[override]
+        """
+        Compare for non-equality.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __ge__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+        """
+        Compare for "greater than or equal to" `other`.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __gt__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+        """
+        Compare for "greater than" `other`.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __le__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+        """
+        Compare for "less than or equal to" `other`.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __lt__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+        """
+        Compare for "less than" `other`.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __and__(self, other: DataFrame[Bool] | bool) -> DataFrame[Bool]:
+        """
+        Apply logical 'and' to `other` DataFrame (or scalar) and this dataframe.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : DataFrame[bool] or bool
+            If DataFrame, must have same length.
+
+        Returns
+        -------
+        DataFrame[bool]
+
+        Raises
+        ------
+        ValueError
+            If `self` or `other` is not boolean.
+        """
+
+    def __or__(self, other: DataFrame[Bool] | bool) -> DataFrame[Bool]:
+        """
+        Apply logical 'or' to `other` DataFrame (or scalar) and this DataFrame.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : DataFrame[bool] or bool
+            If DataFrame, must have same length.
+
+        Returns
+        -------
+        DataFrame[bool]
+
+        Raises
+        ------
+        ValueError
+            If `self` or `other` is not boolean.
+        """
+
+    def __add__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Add `other` dataframe or scalar to this dataframe.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __sub__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Subtract `other` dataframe or scalar from this dataframe.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __mul__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Multiply  `other` dataframe or scalar with this dataframe.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __truediv__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Divide  this dataframe by `other` dataframe or scalar. True division, returns floats.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __floordiv__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Floor-divide (returns integers) this dataframe by `other` dataframe or scalar.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __pow__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Raise this dataframe to the power of `other`.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __mod__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Return modulus of this dataframe by `other` (`%` operator).
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __divmod__(self, other: DataFrame[Any] | Scalar[Any]) -> tuple[DataFrame[Any], DataFrame[Any]]:
+        """
+        Return quotient and remainder of integer division. See `divmod` builtin function.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        A tuple of two DataFrame's
+        """
+        ...
+
+    def __invert__(self: DataFrame[Bool]) -> DataFrame[Bool]:
+        """
+        Invert truthiness of (boolean) elements.
+
+        Raises
+        ------
+        ValueError
+            If any of the DataFrame's columns is not boolean.
+        """
+        ...
+
+    def __iter__(self) -> NoReturn:
+        """
+        Iterate over elements.
+
+        This is intentionally "poisoned" to discourage inefficient code patterns.
+
+        Raises
+        ------
+        NotImplementedError
+        """
+        raise NotImplementedError("'__iter__' is intentionally not implemented.")
+
+    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+        """
+        Reduction returns a 1-row DataFrame.
+
+        Raises
+        ------
+        ValueError
+            If any of the DataFrame's columns is not boolean.
+        """
+        ...
+
+    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+        """
+        Reduction returns a 1-row DataFrame.
+
+        Raises
+        ------
+        ValueError
+            If any of the DataFrame's columns is not boolean.
+        """
+        ...
+    
+    def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
+        """
+        Reduction returns a Column.
+
+        Differs from ``DataFrame.any`` and that the reduction happens
+        for each row, rather than for each column.
+
+        Raises
+        ------
+        ValueError
+            If any of the DataFrame's columns is not boolean.
+        """
+        ...
+
+    def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
+        """
+        Reduction returns a Column.
+
+        Differs from ``DataFrame.all`` and that the reduction happens
+        for each row, rather than for each column.
+
+        Raises
+        ------
+        ValueError
+            If any of the DataFrame's columns is not boolean.
+        """
+        ...
+
+    def min(self, *, skip_nulls: bool = True) -> DataFrame[T]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def max(self, *, skip_nulls: bool = True) -> DataFrame[T]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def is_null(self) -> DataFrame[Bool]:
+        """
+        Check for 'missing' or 'null' entries.
+
+        Returns
+        -------
+        DataFrame
+
+        See also
+        --------
+        is_nan
+
+        Notes
+        -----
+        Does *not* include NaN-like entries.
+        May optionally include 'NaT' values (if present in an implementation),
+        but note that the Standard makes no guarantees about them.
+        """
+        ...
+
+    def is_nan(self) -> DataFrame[Bool]:
+        """
+        Check for nan entries.
+
+        Returns
+        -------
+        DataFrame
+
+        See also
+        --------
+        is_null
+
+        Notes
+        -----
+        This only checks for 'NaN'.
+        Does *not* include 'missing' or 'null' entries.
+        In particular, does not check for `np.timedelta64('NaT')`.
+        """
+        ...
+
+    def fill_nan(self, value: float | 'null', /) -> DataFrame[T]:
+        """
+        Fill ``nan`` values with the given fill value.
+
+        The fill operation will apply to all columns with a floating-point
+        dtype. Other columns remain unchanged.
+
+        Parameters
+        ----------
+        value : float or `null`
+            Value used to replace any ``nan`` in the column with. Must be
+            of the Python scalar type matching the dtype of the column (or
+            be `null`).
+
+        """
+        ...
diff --git a/spec/API_specification/dataframe_api/groupby_object.pyi b/spec/API_specification/dataframe_api/groupby_object.pyi
new file mode 100644
index 00000000..6ca0a600
--- /dev/null
+++ b/spec/API_specification/dataframe_api/groupby_object.pyi
@@ -0,0 +1,54 @@
+from typing import TYPE_CHECKING, TypeVar, Generic, Any
+
+if TYPE_CHECKING:
+    from .dataframe_object import DataFrame
+    from . import IntDType, DType, Bool
+
+
+__all__ = ['GroupBy']
+
+T = TypeVar('T', bound=DType)
+
+
+class GroupBy:
+    """
+    GroupBy object.
+
+    Note that this class is not meant to be constructed by users.
+    It is returned from `DataFrame.groupby`.
+
+    **Methods**
+
+    """
+    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+        ...
+
+    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+        ...
+
+    def min(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def max(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def size(self) -> DataFrame[IntDType]:
+        ...

From 436b819ffcd5e7b4539ef10a8ab25f61ae15d5aa Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 09:08:47 +0100
Subject: [PATCH 03/38] add CI

---
 .github/workflows/mypy.yml | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 .github/workflows/mypy.yml

diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
new file mode 100644
index 00000000..bc7d478e
--- /dev/null
+++ b/.github/workflows/mypy.yml
@@ -0,0 +1,32 @@
+name: mypy
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  tox:
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.11"]
+        os: [ubuntu-latest]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Cache multiple paths
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cache/pip
+            $RUNNER_TOOL_CACHE/Python/*
+            ~\AppData\Local\pip\Cache
+          key: ${{ runner.os }}-build-${{ matrix.python-version }}
+      - name: install-reqs
+        run: python -m pip install --upgrade mypy==1.4.0
+      - name: run mypy
+        run: cd spec/API_specifications && mypy dataframe_api --strict

From e8f5ca723038102a6611cab56ad43a283884da0d Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 09:16:24 +0100
Subject: [PATCH 04/38] typo

---
 .github/workflows/mypy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
index bc7d478e..1a60bec6 100644
--- a/.github/workflows/mypy.yml
+++ b/.github/workflows/mypy.yml
@@ -29,4 +29,4 @@ jobs:
       - name: install-reqs
         run: python -m pip install --upgrade mypy==1.4.0
       - name: run mypy
-        run: cd spec/API_specifications && mypy dataframe_api --strict
+        run: cd spec/API_specification && mypy dataframe_api --strict

From 8ce48df389f4a8a605202191072614583b3c2066 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 09:21:17 +0100
Subject: [PATCH 05/38] wip

---
 spec/API_specification/dataframe_api/__init__.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 spec/API_specification/dataframe_api/__init__.py

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
new file mode 100644
index 00000000..1d2bc426
--- /dev/null
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -0,0 +1 @@
+from . import DataFrame, Column

From 38e027677b883b7eb42341b924f03c80e439b27f Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 10:20:15 +0100
Subject: [PATCH 06/38] keep to .py, but disable empty-body code

---
 .github/workflows/mypy.yml                    |   2 +-
 .../dataframe_api/__init__.py                 |   1 -
 .../dataframe_api/__init__.pyi                | 195 -----
 .../dataframe_api/{_types.pyi => _types.py}   |   0
 .../dataframe_api/column_object.pyi           | 586 --------------
 .../dataframe_api/dataframe_object.pyi        | 756 ------------------
 .../dataframe_api/groupby_object.pyi          |  54 --
 7 files changed, 1 insertion(+), 1593 deletions(-)
 delete mode 100644 spec/API_specification/dataframe_api/__init__.py
 delete mode 100644 spec/API_specification/dataframe_api/__init__.pyi
 rename spec/API_specification/dataframe_api/{_types.pyi => _types.py} (100%)
 delete mode 100644 spec/API_specification/dataframe_api/column_object.pyi
 delete mode 100644 spec/API_specification/dataframe_api/dataframe_object.pyi
 delete mode 100644 spec/API_specification/dataframe_api/groupby_object.pyi

diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
index 1a60bec6..4c7f436b 100644
--- a/.github/workflows/mypy.yml
+++ b/.github/workflows/mypy.yml
@@ -29,4 +29,4 @@ jobs:
       - name: install-reqs
         run: python -m pip install --upgrade mypy==1.4.0
       - name: run mypy
-        run: cd spec/API_specification && mypy dataframe_api --strict
+        run: cd spec/API_specification && mypy dataframe_api --strict --disable-error-code=empty-body
diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
deleted file mode 100644
index 1d2bc426..00000000
--- a/spec/API_specification/dataframe_api/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from . import DataFrame, Column
diff --git a/spec/API_specification/dataframe_api/__init__.pyi b/spec/API_specification/dataframe_api/__init__.pyi
deleted file mode 100644
index ed0d089c..00000000
--- a/spec/API_specification/dataframe_api/__init__.pyi
+++ /dev/null
@@ -1,195 +0,0 @@
-"""
-Function stubs and API documentation for the DataFrame API standard.
-"""
-from __future__ import annotations
-
-from typing import Mapping, Sequence, Any, Generic, TypeVar
-
-from .column_object import *
-from .dataframe_object import DataFrame
-from .groupby_object import *
-
-T = TypeVar("T", bound=DType)
-
-__all__ = [
-    "__dataframe_api_version",
-    "column_from_sequence",
-    "concat",
-    "dataframe_from_dict",
-    "is_null",
-    "null",
-    "DType",
-    "Int64",
-    "Int32",
-    "Int16",
-    "Int8",
-    "UInt64",
-    "UInt32",
-    "UInt16",
-    "UInt8",
-    "Float64",
-    "Float32",
-    "Bool",
-]
-
-
-__dataframe_api_version__: str = "YYYY.MM"
-"""
-String representing the version of the DataFrame API specification to which
-the conforming implementation adheres. Set to a concrete value for a stable
-implementation of the dataframe API standard.
-"""
-
-def concat(dataframes: Sequence[DataFrame[Any]]) -> DataFrame[Any]:
-    """
-    Concatenate DataFrames vertically.
-
-    To concatenate horizontally, please use ``insert``.
-
-    Parameters
-    ----------
-    dataframes : Sequence[DataFrame]
-        DataFrames to concatenate.
-        Column names, ordering, and dtypes must match.
-
-    Notes
-    -----
-    The order in which the input DataFrames appear in
-    the output is preserved (so long as the DataFrame implementation supports row
-    ordering).
-    """
-    ...
-
-def column_from_sequence(sequence: Sequence[Scalar[DType]], *, dtype: DType) -> Column[DType]:
-    """
-    Construct Column from sequence of elements.
-
-    Parameters
-    ----------
-    sequence : Sequence[object]
-        Sequence of elements. Each element must be of the specified
-        ``dtype``, the corresponding Python builtin scalar type, or
-        coercible to that Python scalar type.
-    dtype : DType
-        Dtype of result. Must be specified.
-
-    Returns
-    -------
-    Column
-    """
-    ...
-
-def dataframe_from_dict(data: Mapping[str, Column[Any]]) -> DataFrame[Any]:
-    """
-    Construct DataFrame from map of column names to Columns.
-
-    Parameters
-    ----------
-    data : Mapping[str, Column]
-        Column must be of the corresponding type of the DataFrame.
-        For example, it is only supported to build a ``LibraryXDataFrame`` using
-        ``LibraryXColumn`` instances.
-
-    Returns
-    -------
-    DataFrame
-    """
-    ...
-
-class null:
-    """
-    A `null` object to represent missing data.
-
-    ``null`` is a scalar, and may be used when constructing a `Column` from a
-    Python sequence with `column_from_sequence`. It does not support ``is``,
-    ``==`` or ``bool``.
-
-    Raises
-    ------
-    TypeError
-        From ``__eq__`` and from ``__bool__``.
-
-        For ``__eq__``: a missing value must not be compared for equality
-        directly. Instead, use `DataFrame.is_null` or `Column.is_null` to check
-        for presence of missing values.
-
-        For ``__bool__``: truthiness of a missing value is ambiguous.
-
-    Notes
-    -----
-    Like for Python scalars, the ``null`` object may be duck typed so it can
-    reside on (e.g.) a GPU. Hence, the builtin ``is`` keyword should not be
-    used to check if an object *is* the ``null`` object.
-
-    """
-    ...
-
-def is_null(value: object, /) -> bool:
-    """
-    Check if an object is a `null` scalar.
-
-    Parameters
-    ----------
-    value : object
-        Any input type is valid.
-
-    Returns
-    -------
-    bool
-        True if the input is a `null` object from the same library which
-        implements the dataframe API standard, False otherwise.
-
-    """
-
-##########
-# Dtypes #
-##########
-
-class DType:
-    """Base class for all dtypes."""
-
-class IntDType(DType):
-    """Base class for all integer dtypes."""
-
-class FloatDType(DType):
-    """Base class for all float dtypes."""
-
-class Int64(IntDType):
-    """Integer type with 64 bits of precision."""
-
-class Int32(IntDType):
-    """Integer type with 32 bits of precision."""
-
-class Int16(IntDType):
-    """Integer type with 16 bits of precision."""
-
-class Int8(IntDType):
-    """Integer type with 8 bits of precision."""
-
-class UInt64(IntDType):
-    """Unsigned integer type with 64 bits of precision."""
-
-class UInt32(IntDType):
-    """Unsigned integer type with 32 bits of precision."""
-
-class UInt16(IntDType):
-    """Unsigned integer type with 16 bits of precision."""
-
-class UInt8(IntDType):
-    """Unsigned integer type with 8 bits of precision."""
-
-class Float64(FloatDType):
-    """Floating point type with 64 bits of precision."""
-
-class Float32(FloatDType):
-    """Floating point type with 32 bits of precision."""
-
-class Bool(DType):
-    """Boolean type with 8 bits of precision."""
-
-##########
-# Scalar #
-##########
-
-class Scalar(Generic[T]):
-    ...
diff --git a/spec/API_specification/dataframe_api/_types.pyi b/spec/API_specification/dataframe_api/_types.py
similarity index 100%
rename from spec/API_specification/dataframe_api/_types.pyi
rename to spec/API_specification/dataframe_api/_types.py
diff --git a/spec/API_specification/dataframe_api/column_object.pyi b/spec/API_specification/dataframe_api/column_object.pyi
deleted file mode 100644
index a06c993a..00000000
--- a/spec/API_specification/dataframe_api/column_object.pyi
+++ /dev/null
@@ -1,586 +0,0 @@
-from __future__ import annotations
-
-from typing import Any,NoReturn, Sequence, TYPE_CHECKING, Literal, Generic, TypeVar
-
-if TYPE_CHECKING:
-    from . import DType, IntDType, FloatDType, Bool, null, Scalar
-
-T = TypeVar('T', bound=DType)
-
-__all__ = ['Column']
-
-
-class Column(Generic[T]):
-    """
-    Column object
-
-    Note that this column object is not meant to be instantiated directly by
-    users of the library implementing the dataframe API standard. Rather, use
-    constructor functions or an already-created dataframe object retrieved via
-
-    """
-
-    def __column_namespace__(
-        self, /, *, api_version: str | None = None
-    ) -> Any:
-        """
-        Returns an object that has all the Dataframe Standard API functions on it.
-
-        Parameters
-        ----------
-        api_version: Optional[str]
-            String representing the version of the dataframe API specification
-            to be returned, in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
-            If it is ``None``, it should return the namespace corresponding to
-            latest version of the dataframe API specification.  If the given
-            version is invalid or not implemented for the given module, an
-            error should be raised. Default: ``None``.
-
-        Returns
-        -------
-        namespace: Any
-            An object representing the dataframe API namespace. It should have
-            every top-level function defined in the specification as an
-            attribute. It may contain other public names as well, but it is
-            recommended to only include those names that are part of the
-            specification.
-
-        """
-    
-    @property
-    def column(self) -> object:
-        """
-        Return underlying (not-necessarily-Standard-compliant) column.
-
-        If a library only implements the Standard, then this can return `self`.
-        """
-        ...
-
-    def __len__(self) -> int:
-        """
-        Return the number of rows.
-        """
-
-    def __iter__(self) -> NoReturn:
-        """
-        Iterate over elements.
-
-        This is intentionally "poisoned" to discourage inefficient code patterns.
-
-        Raises
-        ------
-        NotImplementedError
-        """
-        raise NotImplementedError("'__iter__' is intentionally not implemented.")
-
-    @property
-    def dtype(self) -> DType:
-        """
-        Return data type of column.
-        """
-
-    def get_rows(self, indices: Column[IntDType]) -> Column[T]:
-        """
-        Select a subset of rows, similar to `ndarray.take`.
-
-        Parameters
-        ----------
-        indices : Column[IntDType]
-            Positions of rows to select.
-        """
-        ...
-
-    def get_value(self, row_number: int) -> Scalar[T]:
-        """
-        Select the value at a row number, similar to `ndarray.__getitem__(<int>)`.
-
-        Parameters
-        ----------
-        row_number : int
-            Row number of value to return.
-        
-        Returns
-        -------
-        Scalar
-            Depends on the dtype of the Column, and may vary
-            across implementations.
-        """
-        ...
-
-    def sorted_indices(
-        self,
-        *,
-        ascending: bool = True,
-        nulls_position: Literal['first', 'last'] = 'last',
-    ) -> Column[IntDType]:
-        """
-        Return row numbers which would sort column.
-
-        If you need to sort the Column, you can simply do::
-
-            col.get_rows(col.sorted_indices())
-
-        Parameters
-        ----------
-        ascending : bool
-            If `True`, sort in ascending order.
-            If `False`, sort in descending order.
-        nulls_position : ``{'first', 'last'}``
-            Whether null values should be placed at the beginning
-            or at the end of the result.
-            Note that the position of NaNs is unspecified and may
-            vary based on the implementation.
-
-        Returns
-        -------
-        Column[IntDType]
-        """
-        ...
-
-    def __eq__(self, other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:  # type: ignore[override]
-        """
-        Compare for equality.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __ne__(self, other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:  # type: ignore[override]
-        """
-        Compare for non-equality.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __ge__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Compare for "greater than or equal to" `other`.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __gt__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Compare for "greater than" `other`.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __le__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Compare for "less than or equal to" `other`.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __lt__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Compare for "less than" `other`.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __and__(self, other: Column[Bool] | bool) -> Column[Bool]:
-        """
-        Apply logical 'and' to `other` Column (or scalar) and this Column.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : Column[bool] or bool
-            If Column, must have same length.
-
-        Returns
-        -------
-        Column
-
-        Raises
-        ------
-        ValueError
-            If `self` or `other` is not boolean.
-        """
-
-    def __or__(self, other: Column[Bool] | bool) -> Column[Bool]:
-        """
-        Apply logical 'or' to `other` Column (or scalar) and this column.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : Column[bool] or Scalar
-            If Column, must have same length.
-
-        Returns
-        -------
-        Column[bool]
-
-        Raises
-        ------
-        ValueError
-            If `self` or `other` is not boolean.
-        """
-
-    def __add__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Add `other` column or scalar to this column.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __sub__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Subtract `other` column or scalar from this column.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __mul__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Multiply `other` column or scalar with this column.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __truediv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Divide this column by `other` column or scalar. True division, returns floats.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Floor-divide `other` column or scalar to this column.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __pow__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Raise this column to the power of `other`.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __mod__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
-        """
-        Returns modulus of this column by `other` (`%` operator).
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __divmod__(self, other: Column[Any] | Scalar[Any]) -> tuple[Column[IntDType], Column[IntDType]]:
-        """
-        Return quotient and remainder of integer division. See `divmod` builtin function.
-
-        Parameters
-        ----------
-        other : Column or Scalar
-            If Column, must have same length.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        Column
-        """
-
-    def __invert__(self) -> Column[Bool]:
-        """
-        Invert truthiness of (boolean) elements.
-
-        Raises
-        ------
-        ValueError
-            If any of the Column's columns is not boolean.
-        """
-
-    def any(self, *, skip_nulls: bool = True) -> bool:
-        """
-        Reduction returns a bool.
-
-        Raises
-        ------
-        ValueError
-            If column is not boolean.
-        """
-
-    def all(self, *, skip_nulls: bool = True) -> bool:
-        """
-        Reduction returns a bool.
-
-        Raises
-        ------
-        ValueError
-            If column is not boolean.
-        """
-
-    def min(self, *, skip_nulls: bool = True) -> Scalar[T]:
-        """
-        Reduction returns a scalar. Any data type that supports comparisons
-        must be supported. The returned value has the same dtype as the column.
-        """
-
-    def max(self, *, skip_nulls: bool = True) -> Scalar[T]:
-        """
-        Reduction returns a scalar. Any data type that supports comparisons
-        must be supported. The returned value has the same dtype as the column.
-        """
-
-    def sum(self, *, skip_nulls: bool = True) -> Scalar[T]:
-        """
-        Reduction returns a scalar. Must be supported for numerical and
-        datetime data types. The returned value has the same dtype as the
-        column.
-        """
-
-    def prod(self, *, skip_nulls: bool = True) -> Scalar[T]:
-        """
-        Reduction returns a scalar. Must be supported for numerical data types.
-        The returned value has the same dtype as the column.
-        """
-
-    def median(self, *, skip_nulls: bool = True) -> Scalar[Any]:
-        """
-        Reduction returns a scalar. Must be supported for numerical and
-        datetime data types. Returns a float for numerical data types, and
-        datetime (with the appropriate timedelta format string) for datetime
-        dtypes.
-        """
-
-    def mean(self, *, skip_nulls: bool = True) -> Scalar[Any]:
-        """
-        Reduction returns a scalar. Must be supported for numerical and
-        datetime data types. Returns a float for numerical data types, and
-        datetime (with the appropriate timedelta format string) for datetime
-        dtypes.
-        """
-
-    def std(self, *, skip_nulls: bool = True) -> Scalar[Any]:
-        """
-        Reduction returns a scalar. Must be supported for numerical and
-        datetime data types. Returns a float for numerical data types, and
-        datetime (with the appropriate timedelta format string) for datetime
-        dtypes.
-        """
-
-    def var(self, *, skip_nulls: bool = True) -> Scalar[Any]:
-        """
-        Reduction returns a scalar. Must be supported for numerical and
-        datetime data types. Returns a float for numerical data types, and
-        datetime (with the appropriate timedelta format string) for datetime
-        dtypes.
-        """
-
-    def is_null(self) -> Column[Bool]:
-        """
-        Check for 'missing' or 'null' entries.
-
-        Returns
-        -------
-        Column
-
-        See also
-        --------
-        is_nan
-
-        Notes
-        -----
-        Does *not* include NaN-like entries.
-        May optionally include 'NaT' values (if present in an implementation),
-        but note that the Standard makes no guarantees about them.
-        """
-
-    def is_nan(self) -> Column[Bool]:
-        """
-        Check for nan entries.
-
-        Returns
-        -------
-        Column
-
-        See also
-        --------
-        is_null
-
-        Notes
-        -----
-        This only checks for 'NaN'.
-        Does *not* include 'missing' or 'null' entries.
-        In particular, does not check for `np.timedelta64('NaT')`.
-        """
-
-    def is_in(self, values: Column[T]) -> Column[Bool]:
-        """
-        Indicate whether the value at each row matches any value in `values`.
-
-        Parameters
-        ----------
-        values : Column
-            Contains values to compare against. May include ``float('nan')`` and
-            ``null``, in which case ``'nan'`` and ``null`` will
-            respectively return ``True`` even though ``float('nan') == float('nan')``
-            isn't ``True``.
-            The dtype of ``values`` must match the current column's dtype.
-
-        Returns
-        -------
-        Column[bool]
-        """
-
-    def unique_indices(self, *, skip_nulls: bool = True) -> Column[IntDType]:
-        """
-        Return indices corresponding to unique values in Column.
-
-        Returns
-        -------
-        Column[IntDType]
-            Indices corresponding to unique values.
-
-        Notes
-        -----
-        There are no ordering guarantees. In particular, if there are multiple
-        indices corresponding to the same unique value, there is no guarantee
-        about which one will appear in the result.
-        If the original Column contains multiple `'NaN'` values, then
-        only a single index corresponding to those values should be returned.
-        Likewise for null values (if ``skip_nulls=False``).
-        To get the unique values, you can do ``col.get_rows(col.unique_indices())``.
-        """
-        ...
-
-    def fill_nan(self, value: float | 'null', /) -> Column[T]:
-        """
-        Fill floating point ``nan`` values with the given fill value.
-
-        Parameters
-        ----------
-        value : float or `null`
-            Value used to replace any ``nan`` in the column with. Must be
-            of the Python scalar type matching the dtype of the column (or
-            be `null`).
-
-        """
-        ...
diff --git a/spec/API_specification/dataframe_api/dataframe_object.pyi b/spec/API_specification/dataframe_api/dataframe_object.pyi
deleted file mode 100644
index 14a1f29c..00000000
--- a/spec/API_specification/dataframe_api/dataframe_object.pyi
+++ /dev/null
@@ -1,756 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, Literal, Mapping, Sequence, Union, TYPE_CHECKING, NoReturn, TypeVar, Generic
-
-
-if TYPE_CHECKING:
-    from .column_object import Column
-    from .groupby_object import GroupBy
-    from . import DType, IntDType, FloatDType, Bool, null, Scalar
-
-
-__all__ = ["DataFrame"]
-
-T = TypeVar("T", bound=DType)
-
-
-class DataFrame(Generic[T]):
-    """
-    DataFrame object
-
-    Note that this dataframe object is not meant to be instantiated directly by
-    users of the library implementing the dataframe API standard. Rather, use
-    constructor functions or an already-created dataframe object retrieved via
-    
-    **Python operator support**
-
-    All arithmetic operators defined by the Python language, except for
-    ``__matmul__``, ``__neg__`` and ``__pos__``, must be supported for
-    numerical data types.
-
-    All comparison operators defined by the Python language must be supported
-    by the dataframe object for all data types for which those comparisons are
-    supported by the builtin scalar types corresponding to a data type.
-
-    In-place operators must not be supported. All operations on the dataframe
-    object are out-of-place.
-
-    **Methods and Attributes**
-
-    """
-    def __dataframe_namespace__(
-        self, /, *, api_version: str | None = None
-    ) -> Any:
-        """
-        Returns an object that has all the dataframe API functions on it.
-
-        Parameters
-        ----------
-        api_version: Optional[str]
-            String representing the version of the dataframe API specification
-            to be returned, in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
-            If it is ``None``, it should return the namespace corresponding to
-            latest version of the dataframe API specification.  If the given
-            version is invalid or not implemented for the given module, an
-            error should be raised. Default: ``None``.
-
-        Returns
-        -------
-        namespace: Any
-            An object representing the dataframe API namespace. It should have
-            every top-level function defined in the specification as an
-            attribute. It may contain other public names as well, but it is
-            recommended to only include those names that are part of the
-            specification.
-
-        """
-
-    @property
-    def dataframe(self) -> object:
-        """
-        Return underlying (not-necessarily-Standard-compliant) DataFrame.
-
-        If a library only implements the Standard, then this can return `self`.
-        """
-        ...
-    
-    def shape(self) -> tuple[int, int]:
-        """
-        Return number of rows and number of columns.
-        """
-
-    def groupby(self, keys: Sequence[str], /) -> GroupBy:
-        """
-        Group the DataFrame by the given columns.
-
-        Parameters
-        ----------
-        keys : Sequence[str]
-
-        Returns
-        -------
-        GroupBy
-
-        Raises
-        ------
-        KeyError
-            If any of the requested keys are not present.
-
-        Notes
-        -----
-        Downstream operations from this function, like aggregations, return
-        results for which row order is not guaranteed and is implementation
-        defined.
-        """
-        ...
-
-    def get_column_by_name(self, name: str, /) -> Column[T]:
-        """
-        Select a column by name.
-
-        Parameters
-        ----------
-        name : str
-
-        Returns
-        -------
-        Column
-
-        Raises
-        ------
-        KeyError
-            If the key is not present.
-        """
-        ...
-
-    def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame[T]:
-        """
-        Select multiple columns by name.
-
-        Parameters
-        ----------
-        names : Sequence[str]
-
-        Returns
-        -------
-        DataFrame
-
-        Raises
-        ------
-        KeyError
-            If the any requested key is not present.
-        """
-        ...
-
-    def get_rows(self, indices: Column[IntDType]) -> DataFrame[T]:
-        """
-        Select a subset of rows, similar to `ndarray.take`.
-
-        Parameters
-        ----------
-        indices : Column[IntDType]
-            Positions of rows to select.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def slice_rows(
-        self, start: int | None, stop: int | None, step: int | None
-    ) -> DataFrame[T]:
-        """
-        Select a subset of rows corresponding to a slice.
-
-        Parameters
-        ----------
-        start : int or None
-        stop : int or None
-        step : int or None
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def get_rows_by_mask(self, mask: Column[Bool]) -> DataFrame[T]:
-        """
-        Select a subset of rows corresponding to a mask.
-
-        Parameters
-        ----------
-        mask : Column[bool]
-
-        Returns
-        -------
-        DataFrame
-
-        Notes
-        -----
-        Some participants preferred a weaker type Arraylike[bool] for mask,
-        where 'Arraylike' denotes an object adhering to the Array API standard.
-        """
-        ...
-
-    def insert(self, loc: int, label: str, value: Column[Any]) -> DataFrame[Any]:
-        """
-        Insert column into DataFrame at specified location.
-
-        Parameters
-        ----------
-        loc : int
-            Insertion index. Must verify 0 <= loc <= len(columns).
-        label : str
-            Label of the inserted column.
-        value : Column
-        """
-        ...
-
-    def drop_column(self, label: str) -> DataFrame[T]:
-        """
-        Drop the specified column.
-
-        Parameters
-        ----------
-        label : str
-
-        Returns
-        -------
-        DataFrame
-
-        Raises
-        ------
-        KeyError
-            If the label is not present.
-        """
-        ...
-
-    def rename_columns(self, mapping: Mapping[str, str]) -> DataFrame[T]:
-        """
-        Rename columns.
-
-        Parameters
-        ----------
-        mapping : Mapping[str, str]
-            Keys are old column names, values are new column names.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def get_column_names(self) -> Sequence[str]:
-        """
-        Get column names.
-
-        Returns
-        -------
-        Sequence[str]
-        """
-        ...
-
-    def sorted_indices(
-        self,
-        keys: Sequence[str],
-        *,
-        ascending: Sequence[bool] | bool = True,
-        nulls_position: Literal['first', 'last'] = 'last',
-    ) -> Column[IntDType]:
-        """
-        Return row numbers which would sort according to given columns.
-
-        If you need to sort the DataFrame, you can simply do::
-
-            df.get_rows(df.sorted_indices(keys))
-
-        Parameters
-        ----------
-        keys : Sequence[str]
-            Names of columns to sort by.
-        ascending : Sequence[bool] or bool
-            If `True`, sort by all keys in ascending order.
-            If `False`, sort by all keys in descending order.
-            If a sequence, it must be the same length as `keys`,
-            and determines the direction with which to use each
-            key to sort by.
-        nulls_position : ``{'first', 'last'}``
-            Whether null values should be placed at the beginning
-            or at the end of the result.
-            Note that the position of NaNs is unspecified and may
-            vary based on the implementation.
-
-        Returns
-        -------
-        Column[IntDType]
-    
-        Raises
-        ------
-        ValueError
-            If `keys` and `ascending` are sequences of different lengths.
-        """
-        ...
-
-    def __eq__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:  # type: ignore[override]
-        """
-        Compare for equality.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __ne__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:  # type: ignore[override]
-        """
-        Compare for non-equality.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __ge__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
-        """
-        Compare for "greater than or equal to" `other`.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __gt__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
-        """
-        Compare for "greater than" `other`.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __le__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
-        """
-        Compare for "less than or equal to" `other`.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __lt__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
-        """
-        Compare for "less than" `other`.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __and__(self, other: DataFrame[Bool] | bool) -> DataFrame[Bool]:
-        """
-        Apply logical 'and' to `other` DataFrame (or scalar) and this dataframe.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : DataFrame[bool] or bool
-            If DataFrame, must have same length.
-
-        Returns
-        -------
-        DataFrame[bool]
-
-        Raises
-        ------
-        ValueError
-            If `self` or `other` is not boolean.
-        """
-
-    def __or__(self, other: DataFrame[Bool] | bool) -> DataFrame[Bool]:
-        """
-        Apply logical 'or' to `other` DataFrame (or scalar) and this DataFrame.
-
-        Nulls should follow Kleene Logic.
-
-        Parameters
-        ----------
-        other : DataFrame[bool] or bool
-            If DataFrame, must have same length.
-
-        Returns
-        -------
-        DataFrame[bool]
-
-        Raises
-        ------
-        ValueError
-            If `self` or `other` is not boolean.
-        """
-
-    def __add__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
-        """
-        Add `other` dataframe or scalar to this dataframe.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __sub__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
-        """
-        Subtract `other` dataframe or scalar from this dataframe.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __mul__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
-        """
-        Multiply  `other` dataframe or scalar with this dataframe.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __truediv__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
-        """
-        Divide  this dataframe by `other` dataframe or scalar. True division, returns floats.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __floordiv__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
-        """
-        Floor-divide (returns integers) this dataframe by `other` dataframe or scalar.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __pow__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
-        """
-        Raise this dataframe to the power of `other`.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __mod__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
-        """
-        Return modulus of this dataframe by `other` (`%` operator).
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        DataFrame
-        """
-        ...
-
-    def __divmod__(self, other: DataFrame[Any] | Scalar[Any]) -> tuple[DataFrame[Any], DataFrame[Any]]:
-        """
-        Return quotient and remainder of integer division. See `divmod` builtin function.
-
-        Parameters
-        ----------
-        other : DataFrame or Scalar
-            If DataFrame, must have same length and matching columns.
-            "Scalar" here is defined implicitly by what scalar types are allowed
-            for the operation by the underling dtypes.
-
-        Returns
-        -------
-        A tuple of two DataFrame's
-        """
-        ...
-
-    def __invert__(self: DataFrame[Bool]) -> DataFrame[Bool]:
-        """
-        Invert truthiness of (boolean) elements.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
-    def __iter__(self) -> NoReturn:
-        """
-        Iterate over elements.
-
-        This is intentionally "poisoned" to discourage inefficient code patterns.
-
-        Raises
-        ------
-        NotImplementedError
-        """
-        raise NotImplementedError("'__iter__' is intentionally not implemented.")
-
-    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
-        """
-        Reduction returns a 1-row DataFrame.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
-    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
-        """
-        Reduction returns a 1-row DataFrame.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-    
-    def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
-        """
-        Reduction returns a Column.
-
-        Differs from ``DataFrame.any`` and that the reduction happens
-        for each row, rather than for each column.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
-    def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
-        """
-        Reduction returns a Column.
-
-        Differs from ``DataFrame.all`` and that the reduction happens
-        for each row, rather than for each column.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
-    def min(self, *, skip_nulls: bool = True) -> DataFrame[T]:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def max(self, *, skip_nulls: bool = True) -> DataFrame[T]:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        """
-        Reduction returns a 1-row DataFrame.
-        """
-        ...
-
-    def is_null(self) -> DataFrame[Bool]:
-        """
-        Check for 'missing' or 'null' entries.
-
-        Returns
-        -------
-        DataFrame
-
-        See also
-        --------
-        is_nan
-
-        Notes
-        -----
-        Does *not* include NaN-like entries.
-        May optionally include 'NaT' values (if present in an implementation),
-        but note that the Standard makes no guarantees about them.
-        """
-        ...
-
-    def is_nan(self) -> DataFrame[Bool]:
-        """
-        Check for nan entries.
-
-        Returns
-        -------
-        DataFrame
-
-        See also
-        --------
-        is_null
-
-        Notes
-        -----
-        This only checks for 'NaN'.
-        Does *not* include 'missing' or 'null' entries.
-        In particular, does not check for `np.timedelta64('NaT')`.
-        """
-        ...
-
-    def fill_nan(self, value: float | 'null', /) -> DataFrame[T]:
-        """
-        Fill ``nan`` values with the given fill value.
-
-        The fill operation will apply to all columns with a floating-point
-        dtype. Other columns remain unchanged.
-
-        Parameters
-        ----------
-        value : float or `null`
-            Value used to replace any ``nan`` in the column with. Must be
-            of the Python scalar type matching the dtype of the column (or
-            be `null`).
-
-        """
-        ...
diff --git a/spec/API_specification/dataframe_api/groupby_object.pyi b/spec/API_specification/dataframe_api/groupby_object.pyi
deleted file mode 100644
index 6ca0a600..00000000
--- a/spec/API_specification/dataframe_api/groupby_object.pyi
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import TYPE_CHECKING, TypeVar, Generic, Any
-
-if TYPE_CHECKING:
-    from .dataframe_object import DataFrame
-    from . import IntDType, DType, Bool
-
-
-__all__ = ['GroupBy']
-
-T = TypeVar('T', bound=DType)
-
-
-class GroupBy:
-    """
-    GroupBy object.
-
-    Note that this class is not meant to be constructed by users.
-    It is returned from `DataFrame.groupby`.
-
-    **Methods**
-
-    """
-    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
-        ...
-
-    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
-        ...
-
-    def min(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        ...
-
-    def max(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        ...
-
-    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        ...
-
-    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        ...
-
-    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        ...
-
-    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        ...
-
-    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        ...
-
-    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
-        ...
-
-    def size(self) -> DataFrame[IntDType]:
-        ...

From 8f460098b67ff76d38f5eda10e705e81f2cfb96d Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 10:21:00 +0100
Subject: [PATCH 07/38] add missing files

---
 spec/API_specification/README.md              |   7 +
 .../dataframe_api/__init__.py                 | 195 +++++
 .../dataframe_api/column_object.py            | 586 ++++++++++++++
 .../dataframe_api/dataframe_object.py         | 756 ++++++++++++++++++
 .../dataframe_api/groupby_object.py           |  54 ++
 5 files changed, 1598 insertions(+)
 create mode 100644 spec/API_specification/README.md
 create mode 100644 spec/API_specification/dataframe_api/__init__.py
 create mode 100644 spec/API_specification/dataframe_api/column_object.py
 create mode 100644 spec/API_specification/dataframe_api/dataframe_object.py
 create mode 100644 spec/API_specification/dataframe_api/groupby_object.py

diff --git a/spec/API_specification/README.md b/spec/API_specification/README.md
new file mode 100644
index 00000000..0f2a16db
--- /dev/null
+++ b/spec/API_specification/README.md
@@ -0,0 +1,7 @@
+# API Specification
+
+To type-check the spec, please install `mypy==1.4.0` and run
+
+```console
+mypy dataframe_api --strict
+```
\ No newline at end of file
diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
new file mode 100644
index 00000000..1aa4e945
--- /dev/null
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -0,0 +1,195 @@
+"""
+Function stubs and API documentation for the DataFrame API standard.
+"""
+from __future__ import annotations
+
+from typing import Mapping, Sequence, Any, Generic, TypeVar
+
+from .column_object import *
+from .dataframe_object import DataFrame
+from .groupby_object import *
+
+T = TypeVar("T", bound="DType")
+
+__all__ = [
+    "__dataframe_api_version",
+    "column_from_sequence",
+    "concat",
+    "dataframe_from_dict",
+    "is_null",
+    "null",
+    "DType",
+    "Int64",
+    "Int32",
+    "Int16",
+    "Int8",
+    "UInt64",
+    "UInt32",
+    "UInt16",
+    "UInt8",
+    "Float64",
+    "Float32",
+    "Bool",
+]
+
+
+__dataframe_api_version__: str = "YYYY.MM"
+"""
+String representing the version of the DataFrame API specification to which
+the conforming implementation adheres. Set to a concrete value for a stable
+implementation of the dataframe API standard.
+"""
+
+def concat(dataframes: Sequence[DataFrame[Any]]) -> DataFrame[Any]:
+    """
+    Concatenate DataFrames vertically.
+
+    To concatenate horizontally, please use ``insert``.
+
+    Parameters
+    ----------
+    dataframes : Sequence[DataFrame]
+        DataFrames to concatenate.
+        Column names, ordering, and dtypes must match.
+
+    Notes
+    -----
+    The order in which the input DataFrames appear in
+    the output is preserved (so long as the DataFrame implementation supports row
+    ordering).
+    """
+    ...
+
+def column_from_sequence(sequence: Sequence[Scalar[DType]], *, dtype: DType) -> Column[DType]:
+    """
+    Construct Column from sequence of elements.
+
+    Parameters
+    ----------
+    sequence : Sequence[object]
+        Sequence of elements. Each element must be of the specified
+        ``dtype``, the corresponding Python builtin scalar type, or
+        coercible to that Python scalar type.
+    dtype : DType
+        Dtype of result. Must be specified.
+
+    Returns
+    -------
+    Column
+    """
+    ...
+
+def dataframe_from_dict(data: Mapping[str, Column[Any]]) -> DataFrame[Any]:
+    """
+    Construct DataFrame from map of column names to Columns.
+
+    Parameters
+    ----------
+    data : Mapping[str, Column]
+        Column must be of the corresponding type of the DataFrame.
+        For example, it is only supported to build a ``LibraryXDataFrame`` using
+        ``LibraryXColumn`` instances.
+
+    Returns
+    -------
+    DataFrame
+    """
+    ...
+
+class null:
+    """
+    A `null` object to represent missing data.
+
+    ``null`` is a scalar, and may be used when constructing a `Column` from a
+    Python sequence with `column_from_sequence`. It does not support ``is``,
+    ``==`` or ``bool``.
+
+    Raises
+    ------
+    TypeError
+        From ``__eq__`` and from ``__bool__``.
+
+        For ``__eq__``: a missing value must not be compared for equality
+        directly. Instead, use `DataFrame.is_null` or `Column.is_null` to check
+        for presence of missing values.
+
+        For ``__bool__``: truthiness of a missing value is ambiguous.
+
+    Notes
+    -----
+    Like for Python scalars, the ``null`` object may be duck typed so it can
+    reside on (e.g.) a GPU. Hence, the builtin ``is`` keyword should not be
+    used to check if an object *is* the ``null`` object.
+
+    """
+    ...
+
+def is_null(value: object, /) -> bool:
+    """
+    Check if an object is a `null` scalar.
+
+    Parameters
+    ----------
+    value : object
+        Any input type is valid.
+
+    Returns
+    -------
+    bool
+        True if the input is a `null` object from the same library which
+        implements the dataframe API standard, False otherwise.
+
+    """
+
+##########
+# Dtypes #
+##########
+
+class DType:
+    """Base class for all dtypes."""
+
+class IntDType(DType):
+    """Base class for all integer dtypes."""
+
+class FloatDType(DType):
+    """Base class for all float dtypes."""
+
+class Int64(IntDType):
+    """Integer type with 64 bits of precision."""
+
+class Int32(IntDType):
+    """Integer type with 32 bits of precision."""
+
+class Int16(IntDType):
+    """Integer type with 16 bits of precision."""
+
+class Int8(IntDType):
+    """Integer type with 8 bits of precision."""
+
+class UInt64(IntDType):
+    """Unsigned integer type with 64 bits of precision."""
+
+class UInt32(IntDType):
+    """Unsigned integer type with 32 bits of precision."""
+
+class UInt16(IntDType):
+    """Unsigned integer type with 16 bits of precision."""
+
+class UInt8(IntDType):
+    """Unsigned integer type with 8 bits of precision."""
+
+class Float64(FloatDType):
+    """Floating point type with 64 bits of precision."""
+
+class Float32(FloatDType):
+    """Floating point type with 32 bits of precision."""
+
+class Bool(DType):
+    """Boolean type with 8 bits of precision."""
+
+##########
+# Scalar #
+##########
+
+class Scalar(Generic[T]):
+    ...
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
new file mode 100644
index 00000000..a06c993a
--- /dev/null
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -0,0 +1,586 @@
+from __future__ import annotations
+
+from typing import Any,NoReturn, Sequence, TYPE_CHECKING, Literal, Generic, TypeVar
+
+if TYPE_CHECKING:
+    from . import DType, IntDType, FloatDType, Bool, null, Scalar
+
+T = TypeVar('T', bound=DType)
+
+__all__ = ['Column']
+
+
+class Column(Generic[T]):
+    """
+    Column object
+
+    Note that this column object is not meant to be instantiated directly by
+    users of the library implementing the dataframe API standard. Rather, use
+    constructor functions or an already-created dataframe object retrieved via
+
+    """
+
+    def __column_namespace__(
+        self, /, *, api_version: str | None = None
+    ) -> Any:
+        """
+        Returns an object that has all the Dataframe Standard API functions on it.
+
+        Parameters
+        ----------
+        api_version: Optional[str]
+            String representing the version of the dataframe API specification
+            to be returned, in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
+            If it is ``None``, it should return the namespace corresponding to
+            latest version of the dataframe API specification.  If the given
+            version is invalid or not implemented for the given module, an
+            error should be raised. Default: ``None``.
+
+        Returns
+        -------
+        namespace: Any
+            An object representing the dataframe API namespace. It should have
+            every top-level function defined in the specification as an
+            attribute. It may contain other public names as well, but it is
+            recommended to only include those names that are part of the
+            specification.
+
+        """
+    
+    @property
+    def column(self) -> object:
+        """
+        Return underlying (not-necessarily-Standard-compliant) column.
+
+        If a library only implements the Standard, then this can return `self`.
+        """
+        ...
+
+    def __len__(self) -> int:
+        """
+        Return the number of rows.
+        """
+
+    def __iter__(self) -> NoReturn:
+        """
+        Iterate over elements.
+
+        This is intentionally "poisoned" to discourage inefficient code patterns.
+
+        Raises
+        ------
+        NotImplementedError
+        """
+        raise NotImplementedError("'__iter__' is intentionally not implemented.")
+
+    @property
+    def dtype(self) -> DType:
+        """
+        Return data type of column.
+        """
+
+    def get_rows(self, indices: Column[IntDType]) -> Column[T]:
+        """
+        Select a subset of rows, similar to `ndarray.take`.
+
+        Parameters
+        ----------
+        indices : Column[IntDType]
+            Positions of rows to select.
+        """
+        ...
+
+    def get_value(self, row_number: int) -> Scalar[T]:
+        """
+        Select the value at a row number, similar to `ndarray.__getitem__(<int>)`.
+
+        Parameters
+        ----------
+        row_number : int
+            Row number of value to return.
+        
+        Returns
+        -------
+        Scalar
+            Depends on the dtype of the Column, and may vary
+            across implementations.
+        """
+        ...
+
+    def sorted_indices(
+        self,
+        *,
+        ascending: bool = True,
+        nulls_position: Literal['first', 'last'] = 'last',
+    ) -> Column[IntDType]:
+        """
+        Return row numbers which would sort column.
+
+        If you need to sort the Column, you can simply do::
+
+            col.get_rows(col.sorted_indices())
+
+        Parameters
+        ----------
+        ascending : bool
+            If `True`, sort in ascending order.
+            If `False`, sort in descending order.
+        nulls_position : ``{'first', 'last'}``
+            Whether null values should be placed at the beginning
+            or at the end of the result.
+            Note that the position of NaNs is unspecified and may
+            vary based on the implementation.
+
+        Returns
+        -------
+        Column[IntDType]
+        """
+        ...
+
+    def __eq__(self, other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:  # type: ignore[override]
+        """
+        Compare for equality.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __ne__(self, other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:  # type: ignore[override]
+        """
+        Compare for non-equality.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __ge__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Compare for "greater than or equal to" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __gt__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Compare for "greater than" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __le__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Compare for "less than or equal to" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __lt__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Compare for "less than" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __and__(self, other: Column[Bool] | bool) -> Column[Bool]:
+        """
+        Apply logical 'and' to `other` Column (or scalar) and this Column.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : Column[bool] or bool
+            If Column, must have same length.
+
+        Returns
+        -------
+        Column
+
+        Raises
+        ------
+        ValueError
+            If `self` or `other` is not boolean.
+        """
+
+    def __or__(self, other: Column[Bool] | bool) -> Column[Bool]:
+        """
+        Apply logical 'or' to `other` Column (or scalar) and this column.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : Column[bool] or Scalar
+            If Column, must have same length.
+
+        Returns
+        -------
+        Column[bool]
+
+        Raises
+        ------
+        ValueError
+            If `self` or `other` is not boolean.
+        """
+
+    def __add__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Add `other` column or scalar to this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __sub__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Subtract `other` column or scalar from this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __mul__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Multiply `other` column or scalar with this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __truediv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Divide this column by `other` column or scalar. True division, returns floats.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Floor-divide `other` column or scalar to this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __pow__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Raise this column to the power of `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __mod__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+        """
+        Returns modulus of this column by `other` (`%` operator).
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __divmod__(self, other: Column[Any] | Scalar[Any]) -> tuple[Column[IntDType], Column[IntDType]]:
+        """
+        Return quotient and remainder of integer division. See `divmod` builtin function.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __invert__(self) -> Column[Bool]:
+        """
+        Invert truthiness of (boolean) elements.
+
+        Raises
+        ------
+        ValueError
+            If any of the Column's columns is not boolean.
+        """
+
+    def any(self, *, skip_nulls: bool = True) -> bool:
+        """
+        Reduction returns a bool.
+
+        Raises
+        ------
+        ValueError
+            If column is not boolean.
+        """
+
+    def all(self, *, skip_nulls: bool = True) -> bool:
+        """
+        Reduction returns a bool.
+
+        Raises
+        ------
+        ValueError
+            If column is not boolean.
+        """
+
+    def min(self, *, skip_nulls: bool = True) -> Scalar[T]:
+        """
+        Reduction returns a scalar. Any data type that supports comparisons
+        must be supported. The returned value has the same dtype as the column.
+        """
+
+    def max(self, *, skip_nulls: bool = True) -> Scalar[T]:
+        """
+        Reduction returns a scalar. Any data type that supports comparisons
+        must be supported. The returned value has the same dtype as the column.
+        """
+
+    def sum(self, *, skip_nulls: bool = True) -> Scalar[T]:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. The returned value has the same dtype as the
+        column.
+        """
+
+    def prod(self, *, skip_nulls: bool = True) -> Scalar[T]:
+        """
+        Reduction returns a scalar. Must be supported for numerical data types.
+        The returned value has the same dtype as the column.
+        """
+
+    def median(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def mean(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def std(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def var(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def is_null(self) -> Column[Bool]:
+        """
+        Check for 'missing' or 'null' entries.
+
+        Returns
+        -------
+        Column
+
+        See also
+        --------
+        is_nan
+
+        Notes
+        -----
+        Does *not* include NaN-like entries.
+        May optionally include 'NaT' values (if present in an implementation),
+        but note that the Standard makes no guarantees about them.
+        """
+
+    def is_nan(self) -> Column[Bool]:
+        """
+        Check for nan entries.
+
+        Returns
+        -------
+        Column
+
+        See also
+        --------
+        is_null
+
+        Notes
+        -----
+        This only checks for 'NaN'.
+        Does *not* include 'missing' or 'null' entries.
+        In particular, does not check for `np.timedelta64('NaT')`.
+        """
+
+    def is_in(self, values: Column[T]) -> Column[Bool]:
+        """
+        Indicate whether the value at each row matches any value in `values`.
+
+        Parameters
+        ----------
+        values : Column
+            Contains values to compare against. May include ``float('nan')`` and
+            ``null``, in which case ``'nan'`` and ``null`` will
+            respectively return ``True`` even though ``float('nan') == float('nan')``
+            isn't ``True``.
+            The dtype of ``values`` must match the current column's dtype.
+
+        Returns
+        -------
+        Column[bool]
+        """
+
+    def unique_indices(self, *, skip_nulls: bool = True) -> Column[IntDType]:
+        """
+        Return indices corresponding to unique values in Column.
+
+        Returns
+        -------
+        Column[IntDType]
+            Indices corresponding to unique values.
+
+        Notes
+        -----
+        There are no ordering guarantees. In particular, if there are multiple
+        indices corresponding to the same unique value, there is no guarantee
+        about which one will appear in the result.
+        If the original Column contains multiple `'NaN'` values, then
+        only a single index corresponding to those values should be returned.
+        Likewise for null values (if ``skip_nulls=False``).
+        To get the unique values, you can do ``col.get_rows(col.unique_indices())``.
+        """
+        ...
+
+    def fill_nan(self, value: float | 'null', /) -> Column[T]:
+        """
+        Fill floating point ``nan`` values with the given fill value.
+
+        Parameters
+        ----------
+        value : float or `null`
+            Value used to replace any ``nan`` in the column with. Must be
+            of the Python scalar type matching the dtype of the column (or
+            be `null`).
+
+        """
+        ...
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
new file mode 100644
index 00000000..14a1f29c
--- /dev/null
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -0,0 +1,756 @@
+from __future__ import annotations
+
+from typing import Any, Literal, Mapping, Sequence, Union, TYPE_CHECKING, NoReturn, TypeVar, Generic
+
+
+if TYPE_CHECKING:
+    from .column_object import Column
+    from .groupby_object import GroupBy
+    from . import DType, IntDType, FloatDType, Bool, null, Scalar
+
+
+__all__ = ["DataFrame"]
+
+T = TypeVar("T", bound=DType)
+
+
+class DataFrame(Generic[T]):
+    """
+    DataFrame object
+
+    Note that this dataframe object is not meant to be instantiated directly by
+    users of the library implementing the dataframe API standard. Rather, use
+    constructor functions or an already-created dataframe object retrieved via
+    
+    **Python operator support**
+
+    All arithmetic operators defined by the Python language, except for
+    ``__matmul__``, ``__neg__`` and ``__pos__``, must be supported for
+    numerical data types.
+
+    All comparison operators defined by the Python language must be supported
+    by the dataframe object for all data types for which those comparisons are
+    supported by the builtin scalar types corresponding to a data type.
+
+    In-place operators must not be supported. All operations on the dataframe
+    object are out-of-place.
+
+    **Methods and Attributes**
+
+    """
+    def __dataframe_namespace__(
+        self, /, *, api_version: str | None = None
+    ) -> Any:
+        """
+        Returns an object that has all the dataframe API functions on it.
+
+        Parameters
+        ----------
+        api_version: Optional[str]
+            String representing the version of the dataframe API specification
+            to be returned, in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
+            If it is ``None``, it should return the namespace corresponding to
+            latest version of the dataframe API specification.  If the given
+            version is invalid or not implemented for the given module, an
+            error should be raised. Default: ``None``.
+
+        Returns
+        -------
+        namespace: Any
+            An object representing the dataframe API namespace. It should have
+            every top-level function defined in the specification as an
+            attribute. It may contain other public names as well, but it is
+            recommended to only include those names that are part of the
+            specification.
+
+        """
+
+    @property
+    def dataframe(self) -> object:
+        """
+        Return underlying (not-necessarily-Standard-compliant) DataFrame.
+
+        If a library only implements the Standard, then this can return `self`.
+        """
+        ...
+    
+    def shape(self) -> tuple[int, int]:
+        """
+        Return number of rows and number of columns.
+        """
+
+    def groupby(self, keys: Sequence[str], /) -> GroupBy:
+        """
+        Group the DataFrame by the given columns.
+
+        Parameters
+        ----------
+        keys : Sequence[str]
+
+        Returns
+        -------
+        GroupBy
+
+        Raises
+        ------
+        KeyError
+            If any of the requested keys are not present.
+
+        Notes
+        -----
+        Downstream operations from this function, like aggregations, return
+        results for which row order is not guaranteed and is implementation
+        defined.
+        """
+        ...
+
+    def get_column_by_name(self, name: str, /) -> Column[T]:
+        """
+        Select a column by name.
+
+        Parameters
+        ----------
+        name : str
+
+        Returns
+        -------
+        Column
+
+        Raises
+        ------
+        KeyError
+            If the key is not present.
+        """
+        ...
+
+    def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame[T]:
+        """
+        Select multiple columns by name.
+
+        Parameters
+        ----------
+        names : Sequence[str]
+
+        Returns
+        -------
+        DataFrame
+
+        Raises
+        ------
+        KeyError
+            If the any requested key is not present.
+        """
+        ...
+
+    def get_rows(self, indices: Column[IntDType]) -> DataFrame[T]:
+        """
+        Select a subset of rows, similar to `ndarray.take`.
+
+        Parameters
+        ----------
+        indices : Column[IntDType]
+            Positions of rows to select.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def slice_rows(
+        self, start: int | None, stop: int | None, step: int | None
+    ) -> DataFrame[T]:
+        """
+        Select a subset of rows corresponding to a slice.
+
+        Parameters
+        ----------
+        start : int or None
+        stop : int or None
+        step : int or None
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def get_rows_by_mask(self, mask: Column[Bool]) -> DataFrame[T]:
+        """
+        Select a subset of rows corresponding to a mask.
+
+        Parameters
+        ----------
+        mask : Column[bool]
+
+        Returns
+        -------
+        DataFrame
+
+        Notes
+        -----
+        Some participants preferred a weaker type Arraylike[bool] for mask,
+        where 'Arraylike' denotes an object adhering to the Array API standard.
+        """
+        ...
+
+    def insert(self, loc: int, label: str, value: Column[Any]) -> DataFrame[Any]:
+        """
+        Insert column into DataFrame at specified location.
+
+        Parameters
+        ----------
+        loc : int
+            Insertion index. Must verify 0 <= loc <= len(columns).
+        label : str
+            Label of the inserted column.
+        value : Column
+        """
+        ...
+
+    def drop_column(self, label: str) -> DataFrame[T]:
+        """
+        Drop the specified column.
+
+        Parameters
+        ----------
+        label : str
+
+        Returns
+        -------
+        DataFrame
+
+        Raises
+        ------
+        KeyError
+            If the label is not present.
+        """
+        ...
+
+    def rename_columns(self, mapping: Mapping[str, str]) -> DataFrame[T]:
+        """
+        Rename columns.
+
+        Parameters
+        ----------
+        mapping : Mapping[str, str]
+            Keys are old column names, values are new column names.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def get_column_names(self) -> Sequence[str]:
+        """
+        Get column names.
+
+        Returns
+        -------
+        Sequence[str]
+        """
+        ...
+
+    def sorted_indices(
+        self,
+        keys: Sequence[str],
+        *,
+        ascending: Sequence[bool] | bool = True,
+        nulls_position: Literal['first', 'last'] = 'last',
+    ) -> Column[IntDType]:
+        """
+        Return row numbers which would sort according to given columns.
+
+        If you need to sort the DataFrame, you can simply do::
+
+            df.get_rows(df.sorted_indices(keys))
+
+        Parameters
+        ----------
+        keys : Sequence[str]
+            Names of columns to sort by.
+        ascending : Sequence[bool] or bool
+            If `True`, sort by all keys in ascending order.
+            If `False`, sort by all keys in descending order.
+            If a sequence, it must be the same length as `keys`,
+            and determines the direction with which to use each
+            key to sort by.
+        nulls_position : ``{'first', 'last'}``
+            Whether null values should be placed at the beginning
+            or at the end of the result.
+            Note that the position of NaNs is unspecified and may
+            vary based on the implementation.
+
+        Returns
+        -------
+        Column[IntDType]
+    
+        Raises
+        ------
+        ValueError
+            If `keys` and `ascending` are sequences of different lengths.
+        """
+        ...
+
+    def __eq__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:  # type: ignore[override]
+        """
+        Compare for equality.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __ne__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:  # type: ignore[override]
+        """
+        Compare for non-equality.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __ge__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+        """
+        Compare for "greater than or equal to" `other`.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __gt__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+        """
+        Compare for "greater than" `other`.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __le__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+        """
+        Compare for "less than or equal to" `other`.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __lt__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+        """
+        Compare for "less than" `other`.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __and__(self, other: DataFrame[Bool] | bool) -> DataFrame[Bool]:
+        """
+        Apply logical 'and' to `other` DataFrame (or scalar) and this dataframe.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : DataFrame[bool] or bool
+            If DataFrame, must have same length.
+
+        Returns
+        -------
+        DataFrame[bool]
+
+        Raises
+        ------
+        ValueError
+            If `self` or `other` is not boolean.
+        """
+
+    def __or__(self, other: DataFrame[Bool] | bool) -> DataFrame[Bool]:
+        """
+        Apply logical 'or' to `other` DataFrame (or scalar) and this DataFrame.
+
+        Nulls should follow Kleene Logic.
+
+        Parameters
+        ----------
+        other : DataFrame[bool] or bool
+            If DataFrame, must have same length.
+
+        Returns
+        -------
+        DataFrame[bool]
+
+        Raises
+        ------
+        ValueError
+            If `self` or `other` is not boolean.
+        """
+
+    def __add__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Add `other` dataframe or scalar to this dataframe.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __sub__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Subtract `other` dataframe or scalar from this dataframe.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __mul__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Multiply  `other` dataframe or scalar with this dataframe.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __truediv__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Divide  this dataframe by `other` dataframe or scalar. True division, returns floats.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __floordiv__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Floor-divide (returns integers) this dataframe by `other` dataframe or scalar.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __pow__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Raise this dataframe to the power of `other`.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __mod__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+        """
+        Return modulus of this dataframe by `other` (`%` operator).
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        DataFrame
+        """
+        ...
+
+    def __divmod__(self, other: DataFrame[Any] | Scalar[Any]) -> tuple[DataFrame[Any], DataFrame[Any]]:
+        """
+        Return quotient and remainder of integer division. See `divmod` builtin function.
+
+        Parameters
+        ----------
+        other : DataFrame or Scalar
+            If DataFrame, must have same length and matching columns.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        A tuple of two DataFrame's
+        """
+        ...
+
+    def __invert__(self: DataFrame[Bool]) -> DataFrame[Bool]:
+        """
+        Invert truthiness of (boolean) elements.
+
+        Raises
+        ------
+        ValueError
+            If any of the DataFrame's columns is not boolean.
+        """
+        ...
+
+    def __iter__(self) -> NoReturn:
+        """
+        Iterate over elements.
+
+        This is intentionally "poisoned" to discourage inefficient code patterns.
+
+        Raises
+        ------
+        NotImplementedError
+        """
+        raise NotImplementedError("'__iter__' is intentionally not implemented.")
+
+    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+        """
+        Reduction returns a 1-row DataFrame.
+
+        Raises
+        ------
+        ValueError
+            If any of the DataFrame's columns is not boolean.
+        """
+        ...
+
+    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+        """
+        Reduction returns a 1-row DataFrame.
+
+        Raises
+        ------
+        ValueError
+            If any of the DataFrame's columns is not boolean.
+        """
+        ...
+    
+    def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
+        """
+        Reduction returns a Column.
+
+        Differs from ``DataFrame.any`` and that the reduction happens
+        for each row, rather than for each column.
+
+        Raises
+        ------
+        ValueError
+            If any of the DataFrame's columns is not boolean.
+        """
+        ...
+
+    def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
+        """
+        Reduction returns a Column.
+
+        Differs from ``DataFrame.all`` and that the reduction happens
+        for each row, rather than for each column.
+
+        Raises
+        ------
+        ValueError
+            If any of the DataFrame's columns is not boolean.
+        """
+        ...
+
+    def min(self, *, skip_nulls: bool = True) -> DataFrame[T]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def max(self, *, skip_nulls: bool = True) -> DataFrame[T]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        """
+        Reduction returns a 1-row DataFrame.
+        """
+        ...
+
+    def is_null(self) -> DataFrame[Bool]:
+        """
+        Check for 'missing' or 'null' entries.
+
+        Returns
+        -------
+        DataFrame
+
+        See also
+        --------
+        is_nan
+
+        Notes
+        -----
+        Does *not* include NaN-like entries.
+        May optionally include 'NaT' values (if present in an implementation),
+        but note that the Standard makes no guarantees about them.
+        """
+        ...
+
+    def is_nan(self) -> DataFrame[Bool]:
+        """
+        Check for nan entries.
+
+        Returns
+        -------
+        DataFrame
+
+        See also
+        --------
+        is_null
+
+        Notes
+        -----
+        This only checks for 'NaN'.
+        Does *not* include 'missing' or 'null' entries.
+        In particular, does not check for `np.timedelta64('NaT')`.
+        """
+        ...
+
+    def fill_nan(self, value: float | 'null', /) -> DataFrame[T]:
+        """
+        Fill ``nan`` values with the given fill value.
+
+        The fill operation will apply to all columns with a floating-point
+        dtype. Other columns remain unchanged.
+
+        Parameters
+        ----------
+        value : float or `null`
+            Value used to replace any ``nan`` in the column with. Must be
+            of the Python scalar type matching the dtype of the column (or
+            be `null`).
+
+        """
+        ...
diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py
new file mode 100644
index 00000000..6ca0a600
--- /dev/null
+++ b/spec/API_specification/dataframe_api/groupby_object.py
@@ -0,0 +1,54 @@
+from typing import TYPE_CHECKING, TypeVar, Generic, Any
+
+if TYPE_CHECKING:
+    from .dataframe_object import DataFrame
+    from . import IntDType, DType, Bool
+
+
+__all__ = ['GroupBy']
+
+T = TypeVar('T', bound=DType)
+
+
+class GroupBy:
+    """
+    GroupBy object.
+
+    Note that this class is not meant to be constructed by users.
+    It is returned from `DataFrame.groupby`.
+
+    **Methods**
+
+    """
+    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+        ...
+
+    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+        ...
+
+    def min(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def max(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+        ...
+
+    def size(self) -> DataFrame[IntDType]:
+        ...

From 69c32833bb06663b6bb939f457294dc4b1f664eb Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 10:41:15 +0100
Subject: [PATCH 08/38] fixup!

---
 spec/API_specification/dataframe_api/column_object.py    | 2 +-
 spec/API_specification/dataframe_api/dataframe_object.py | 2 +-
 spec/API_specification/dataframe_api/groupby_object.py   | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index a06c993a..71f14119 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -5,7 +5,7 @@
 if TYPE_CHECKING:
     from . import DType, IntDType, FloatDType, Bool, null, Scalar
 
-T = TypeVar('T', bound=DType)
+T = TypeVar('T', bound="DType")
 
 __all__ = ['Column']
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 14a1f29c..a920a7f3 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -11,7 +11,7 @@
 
 __all__ = ["DataFrame"]
 
-T = TypeVar("T", bound=DType)
+T = TypeVar("T", bound="DType")
 
 
 class DataFrame(Generic[T]):
diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py
index 6ca0a600..eb06f27d 100644
--- a/spec/API_specification/dataframe_api/groupby_object.py
+++ b/spec/API_specification/dataframe_api/groupby_object.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import TYPE_CHECKING, TypeVar, Generic, Any
 
 if TYPE_CHECKING:
@@ -7,7 +9,7 @@
 
 __all__ = ['GroupBy']
 
-T = TypeVar('T', bound=DType)
+T = TypeVar('T', bound="DType")
 
 
 class GroupBy:

From 08f085ea6186c5d319fc1c2ca798fced2cf40468 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 10:48:05 +0100
Subject: [PATCH 09/38] fixup some types

---
 .../dataframe_api/column_object.py            | 20 +++++++++----------
 .../dataframe_api/dataframe_object.py         |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 71f14119..58e748bd 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -137,7 +137,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:  # type: ignore[override]
+    def __eq__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -155,7 +155,7 @@ def __eq__(self, other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:  # type: i
         Column
         """
 
-    def __ne__(self, other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:  # type: ignore[override]
+    def __ne__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -173,7 +173,7 @@ def __ne__(self, other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:  # type: i
         Column
         """
 
-    def __ge__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __ge__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -189,7 +189,7 @@ def __ge__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __gt__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __gt__(self, other: Column[T] | Scalar[T]) -> Column[Any]:
         """
         Compare for "greater than" `other`.
 
@@ -205,7 +205,7 @@ def __gt__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __le__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __le__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
         """
         Compare for "less than or equal to" `other`.
 
@@ -221,7 +221,7 @@ def __le__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __lt__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __lt__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
         """
         Compare for "less than" `other`.
 
@@ -237,7 +237,7 @@ def __lt__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __and__(self, other: Column[Bool] | bool) -> Column[Bool]:
+    def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
         """
         Apply logical 'and' to `other` Column (or scalar) and this Column.
 
@@ -258,7 +258,7 @@ def __and__(self, other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __or__(self, other: Column[Bool] | bool) -> Column[Bool]:
+    def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
         """
         Apply logical 'or' to `other` Column (or scalar) and this column.
 
@@ -343,7 +343,7 @@ def __truediv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[T]:
         """
         Floor-divide `other` column or scalar to this column.
 
@@ -359,7 +359,7 @@ def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __pow__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __pow__(self, other: Column[Any] | Scalar[Any]) -> Column[T]:
         """
         Raise this column to the power of `other`.
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index a920a7f3..dce657a7 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -293,7 +293,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:  # type: ignore[override]
+    def __eq__(self, other: DataFrame[T] | Scalar[T]) -> DataFrame[Bool]:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -312,7 +312,7 @@ def __eq__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:  # typ
         """
         ...
 
-    def __ne__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:  # type: ignore[override]
+    def __ne__(self, other: DataFrame[T] | Scalar[T]) -> DataFrame[Bool]:  # type: ignore[override]
         """
         Compare for non-equality.
 

From 4465e674b8e9be9b4af3d8d2d8d8408678bf5a6b Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 10:55:48 +0100
Subject: [PATCH 10/38] some corrections

---
 .../dataframe_api/__init__.py                 |  2 +-
 .../dataframe_api/column_object.py            | 44 +++++++++----------
 .../dataframe_api/dataframe_object.py         | 24 +++++-----
 .../dataframe_api/groupby_object.py           | 20 ++++-----
 spec/design_topics/python_builtin_types.md    |  2 +-
 5 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 1aa4e945..da5b4312 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -124,7 +124,7 @@ class null:
     """
     ...
 
-def is_null(value: object, /) -> bool:
+def is_null(value: object, /) -> Scalar[Bool]:
     """
     Check if an object is a `null` scalar.
 
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 58e748bd..3eafd194 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -90,7 +90,7 @@ def get_rows(self, indices: Column[IntDType]) -> Column[T]:
         """
         ...
 
-    def get_value(self, row_number: int) -> Scalar[T]:
+    def get_value(self, row_number: Scalar[IntDType]) -> Scalar[T]:
         """
         Select the value at a row number, similar to `ndarray.__getitem__(<int>)`.
 
@@ -110,7 +110,7 @@ def get_value(self, row_number: int) -> Scalar[T]:
     def sorted_indices(
         self,
         *,
-        ascending: bool = True,
+        ascending: Scalar[Bool] = True,
         nulls_position: Literal['first', 'last'] = 'last',
     ) -> Column[IntDType]:
         """
@@ -122,7 +122,7 @@ def sorted_indices(
 
         Parameters
         ----------
-        ascending : bool
+        ascending : Scalar[Bool]
             If `True`, sort in ascending order.
             If `False`, sort in descending order.
         nulls_position : ``{'first', 'last'}``
@@ -189,7 +189,7 @@ def __ge__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
         Column
         """
 
-    def __gt__(self, other: Column[T] | Scalar[T]) -> Column[Any]:
+    def __gt__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
         """
         Compare for "greater than" `other`.
 
@@ -279,7 +279,7 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __add__(self, other: Column[T] | Scalar[T]) -> Column[T]:
         """
         Add `other` column or scalar to this column.
 
@@ -295,7 +295,7 @@ def __add__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __sub__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __sub__(self, other: Column[T] | Scalar[T]) -> Column[T]:
         """
         Subtract `other` column or scalar from this column.
 
@@ -327,7 +327,7 @@ def __mul__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __truediv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __truediv__(self, other: Column[Any] | Scalar[Any]) -> Column[FloatDType]:
         """
         Divide this column by `other` column or scalar. True division, returns floats.
 
@@ -343,7 +343,7 @@ def __truediv__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[T]:
+    def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[IntDType]:
         """
         Floor-divide `other` column or scalar to this column.
 
@@ -359,7 +359,7 @@ def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[T]:
         Column
         """
 
-    def __pow__(self, other: Column[Any] | Scalar[Any]) -> Column[T]:
+    def __pow__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         """
         Raise this column to the power of `other`.
 
@@ -391,7 +391,7 @@ def __mod__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __divmod__(self, other: Column[Any] | Scalar[Any]) -> tuple[Column[IntDType], Column[IntDType]]:
+    def __divmod__(self, other: Column[Any] | Scalar[Any]) -> tuple[Column[IntDType], Column[FloatDType]]:
         """
         Return quotient and remainder of integer division. See `divmod` builtin function.
 
@@ -407,7 +407,7 @@ def __divmod__(self, other: Column[Any] | Scalar[Any]) -> tuple[Column[IntDType]
         Column
         """
 
-    def __invert__(self) -> Column[Bool]:
+    def __invert__(self: Column[Bool]) -> Column[Bool]:
         """
         Invert truthiness of (boolean) elements.
 
@@ -417,7 +417,7 @@ def __invert__(self) -> Column[Bool]:
             If any of the Column's columns is not boolean.
         """
 
-    def any(self, *, skip_nulls: bool = True) -> bool:
+    def any(self: Column[Bool], *, skip_nulls: Scalar[Bool] = True) -> Scalar[Bool]:
         """
         Reduction returns a bool.
 
@@ -427,7 +427,7 @@ def any(self, *, skip_nulls: bool = True) -> bool:
             If column is not boolean.
         """
 
-    def all(self, *, skip_nulls: bool = True) -> bool:
+    def all(self: Column[Bool], *, skip_nulls: Scalar[Bool] = True) -> Scalar[Bool]:
         """
         Reduction returns a bool.
 
@@ -437,32 +437,32 @@ def all(self, *, skip_nulls: bool = True) -> bool:
             If column is not boolean.
         """
 
-    def min(self, *, skip_nulls: bool = True) -> Scalar[T]:
+    def min(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[T]:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def max(self, *, skip_nulls: bool = True) -> Scalar[T]:
+    def max(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[T]:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def sum(self, *, skip_nulls: bool = True) -> Scalar[T]:
+    def sum(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[T]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
         column.
         """
 
-    def prod(self, *, skip_nulls: bool = True) -> Scalar[T]:
+    def prod(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
         """
         Reduction returns a scalar. Must be supported for numerical data types.
         The returned value has the same dtype as the column.
         """
 
-    def median(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+    def median(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -470,7 +470,7 @@ def median(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         dtypes.
         """
 
-    def mean(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+    def mean(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -478,7 +478,7 @@ def mean(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         dtypes.
         """
 
-    def std(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+    def std(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -486,7 +486,7 @@ def std(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         dtypes.
         """
 
-    def var(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+    def var(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -550,7 +550,7 @@ def is_in(self, values: Column[T]) -> Column[Bool]:
         Column[bool]
         """
 
-    def unique_indices(self, *, skip_nulls: bool = True) -> Column[IntDType]:
+    def unique_indices(self, *, skip_nulls: Scalar[Bool] = True) -> Column[IntDType]:
         """
         Return indices corresponding to unique values in Column.
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index dce657a7..8e9cbc73 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -600,7 +600,7 @@ def __iter__(self) -> NoReturn:
         """
         raise NotImplementedError("'__iter__' is intentionally not implemented.")
 
-    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+    def any(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Bool]:
         """
         Reduction returns a 1-row DataFrame.
 
@@ -611,7 +611,7 @@ def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
         """
         ...
 
-    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+    def all(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Bool]:
         """
         Reduction returns a 1-row DataFrame.
 
@@ -622,7 +622,7 @@ def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
         """
         ...
     
-    def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
+    def any_rowwise(self, *, skip_nulls: Scalar[Bool] = True) -> Column[Bool]:
         """
         Reduction returns a Column.
 
@@ -636,7 +636,7 @@ def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
         """
         ...
 
-    def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
+    def all_rowwise(self, *, skip_nulls: Scalar[Bool] = True) -> Column[Bool]:
         """
         Reduction returns a Column.
 
@@ -650,49 +650,49 @@ def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
         """
         ...
 
-    def min(self, *, skip_nulls: bool = True) -> DataFrame[T]:
+    def min(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[T]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def max(self, *, skip_nulls: bool = True) -> DataFrame[T]:
+    def max(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[T]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def sum(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def prod(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def median(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def mean(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def std(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def var(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py
index eb06f27d..f6e845ad 100644
--- a/spec/API_specification/dataframe_api/groupby_object.py
+++ b/spec/API_specification/dataframe_api/groupby_object.py
@@ -22,34 +22,34 @@ class GroupBy:
     **Methods**
 
     """
-    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+    def any(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Bool]:
         ...
 
-    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+    def all(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Bool]:
         ...
 
-    def min(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def min(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         ...
 
-    def max(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def max(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         ...
 
-    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def sum(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         ...
 
-    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def prod(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         ...
 
-    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def median(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         ...
 
-    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def mean(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         ...
 
-    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def std(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         ...
 
-    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def var(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
         ...
 
     def size(self) -> DataFrame[IntDType]:
diff --git a/spec/design_topics/python_builtin_types.md b/spec/design_topics/python_builtin_types.md
index 567baca3..04194bf7 100644
--- a/spec/design_topics/python_builtin_types.md
+++ b/spec/design_topics/python_builtin_types.md
@@ -18,7 +18,7 @@ class DataFrame:
         ...
 
 class Column:
-    def mean(self, skip_nulls: bool = True) -> float:
+    def mean(self, skip_nulls: Scalar[Bool] = True) -> float:
         ...
 
 larger = df2 > df1.get_column_by_name('foo').mean()

From 51f542550cbd164d947f94aa1eac2142cc27878e Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 11:01:41 +0100
Subject: [PATCH 11/38] fixup again

---
 spec/API_specification/.mypy.ini              |  3 +++
 .../dataframe_api/column_object.py            | 22 ++++++++---------
 .../dataframe_api/dataframe_object.py         | 24 +++++++++----------
 .../dataframe_api/groupby_object.py           | 20 ++++++++--------
 spec/design_topics/python_builtin_types.md    |  2 +-
 5 files changed, 37 insertions(+), 34 deletions(-)
 create mode 100644 spec/API_specification/.mypy.ini

diff --git a/spec/API_specification/.mypy.ini b/spec/API_specification/.mypy.ini
new file mode 100644
index 00000000..eef0ed08
--- /dev/null
+++ b/spec/API_specification/.mypy.ini
@@ -0,0 +1,3 @@
+[mypy]
+strict=True
+disable_error_code=empty-body
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 3eafd194..cd574f81 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -417,7 +417,7 @@ def __invert__(self: Column[Bool]) -> Column[Bool]:
             If any of the Column's columns is not boolean.
         """
 
-    def any(self: Column[Bool], *, skip_nulls: Scalar[Bool] = True) -> Scalar[Bool]:
+    def any(self: Column[Bool], *, skip_nulls: bool = True) -> Scalar[Bool]:
         """
         Reduction returns a bool.
 
@@ -427,7 +427,7 @@ def any(self: Column[Bool], *, skip_nulls: Scalar[Bool] = True) -> Scalar[Bool]:
             If column is not boolean.
         """
 
-    def all(self: Column[Bool], *, skip_nulls: Scalar[Bool] = True) -> Scalar[Bool]:
+    def all(self: Column[Bool], *, skip_nulls: bool = True) -> Scalar[Bool]:
         """
         Reduction returns a bool.
 
@@ -437,32 +437,32 @@ def all(self: Column[Bool], *, skip_nulls: Scalar[Bool] = True) -> Scalar[Bool]:
             If column is not boolean.
         """
 
-    def min(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[T]:
+    def min(self, *, skip_nulls: bool = True) -> Scalar[T]:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def max(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[T]:
+    def max(self, *, skip_nulls: bool = True) -> Scalar[T]:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def sum(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[T]:
+    def sum(self, *, skip_nulls: bool = True) -> Scalar[T]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
         column.
         """
 
-    def prod(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
+    def prod(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         """
         Reduction returns a scalar. Must be supported for numerical data types.
         The returned value has the same dtype as the column.
         """
 
-    def median(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
+    def median(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -470,7 +470,7 @@ def median(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
         dtypes.
         """
 
-    def mean(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
+    def mean(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -478,7 +478,7 @@ def mean(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
         dtypes.
         """
 
-    def std(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
+    def std(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -486,7 +486,7 @@ def std(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
         dtypes.
         """
 
-    def var(self, *, skip_nulls: Scalar[Bool] = True) -> Scalar[Any]:
+    def var(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -550,7 +550,7 @@ def is_in(self, values: Column[T]) -> Column[Bool]:
         Column[bool]
         """
 
-    def unique_indices(self, *, skip_nulls: Scalar[Bool] = True) -> Column[IntDType]:
+    def unique_indices(self, *, skip_nulls: bool = True) -> Column[IntDType]:
         """
         Return indices corresponding to unique values in Column.
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 8e9cbc73..dce657a7 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -600,7 +600,7 @@ def __iter__(self) -> NoReturn:
         """
         raise NotImplementedError("'__iter__' is intentionally not implemented.")
 
-    def any(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Bool]:
+    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
         """
         Reduction returns a 1-row DataFrame.
 
@@ -611,7 +611,7 @@ def any(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Bool]:
         """
         ...
 
-    def all(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Bool]:
+    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
         """
         Reduction returns a 1-row DataFrame.
 
@@ -622,7 +622,7 @@ def all(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Bool]:
         """
         ...
     
-    def any_rowwise(self, *, skip_nulls: Scalar[Bool] = True) -> Column[Bool]:
+    def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
         """
         Reduction returns a Column.
 
@@ -636,7 +636,7 @@ def any_rowwise(self, *, skip_nulls: Scalar[Bool] = True) -> Column[Bool]:
         """
         ...
 
-    def all_rowwise(self, *, skip_nulls: Scalar[Bool] = True) -> Column[Bool]:
+    def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
         """
         Reduction returns a Column.
 
@@ -650,49 +650,49 @@ def all_rowwise(self, *, skip_nulls: Scalar[Bool] = True) -> Column[Bool]:
         """
         ...
 
-    def min(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[T]:
+    def min(self, *, skip_nulls: bool = True) -> DataFrame[T]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def max(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[T]:
+    def max(self, *, skip_nulls: bool = True) -> DataFrame[T]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def sum(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def prod(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def median(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def mean(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def std(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def var(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         """
         Reduction returns a 1-row DataFrame.
         """
diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py
index f6e845ad..eb06f27d 100644
--- a/spec/API_specification/dataframe_api/groupby_object.py
+++ b/spec/API_specification/dataframe_api/groupby_object.py
@@ -22,34 +22,34 @@ class GroupBy:
     **Methods**
 
     """
-    def any(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Bool]:
+    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
         ...
 
-    def all(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Bool]:
+    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
         ...
 
-    def min(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def min(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         ...
 
-    def max(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def max(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         ...
 
-    def sum(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         ...
 
-    def prod(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         ...
 
-    def median(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         ...
 
-    def mean(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         ...
 
-    def std(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         ...
 
-    def var(self, *, skip_nulls: Scalar[Bool] = True) -> DataFrame[Any]:
+    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
         ...
 
     def size(self) -> DataFrame[IntDType]:
diff --git a/spec/design_topics/python_builtin_types.md b/spec/design_topics/python_builtin_types.md
index 04194bf7..567baca3 100644
--- a/spec/design_topics/python_builtin_types.md
+++ b/spec/design_topics/python_builtin_types.md
@@ -18,7 +18,7 @@ class DataFrame:
         ...
 
 class Column:
-    def mean(self, skip_nulls: Scalar[Bool] = True) -> float:
+    def mean(self, skip_nulls: bool = True) -> float:
         ...
 
 larger = df2 > df1.get_column_by_name('foo').mean()

From b654dadeef0f4e950faba68a3470fe22800a1565 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 11:16:05 +0100
Subject: [PATCH 12/38] wip

---
 .../dataframe_api/__init__.py                 |  5 ++-
 .../API_specification/dataframe_api/_types.py |  6 +++-
 .../dataframe_api/column_object.py            | 35 +++++++++----------
 .../dataframe_api/dataframe_object.py         | 29 ++++++++-------
 4 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index da5b4312..7f0cc513 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -8,8 +8,7 @@
 from .column_object import *
 from .dataframe_object import DataFrame
 from .groupby_object import *
-
-T = TypeVar("T", bound="DType")
+from ._types import DTypeT
 
 __all__ = [
     "__dataframe_api_version",
@@ -191,5 +190,5 @@ class Bool(DType):
 # Scalar #
 ##########
 
-class Scalar(Generic[T]):
+class Scalar(Generic[DTypeT]):
     ...
diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py
index 2874ba4c..f01e6163 100644
--- a/spec/API_specification/dataframe_api/_types.py
+++ b/spec/API_specification/dataframe_api/_types.py
@@ -17,13 +17,17 @@
     TypeVar,
     Union,
     Protocol,
+    TYPE_CHECKING,
 )
 from enum import Enum
 
+if TYPE_CHECKING:
+    from . import DType
+
 array = TypeVar("array")
 Scalar = TypeVar("Scalar")
 device = TypeVar("device")
-DType = TypeVar("DType")
+DTypeT = TypeVar("DTypeT", bound="DType")
 SupportsDLPack = TypeVar("SupportsDLPack")
 SupportsBufferProtocol = TypeVar("SupportsBufferProtocol")
 PyCapsule = TypeVar("PyCapsule")
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index cd574f81..5cf4bbc1 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -4,13 +4,12 @@
 
 if TYPE_CHECKING:
     from . import DType, IntDType, FloatDType, Bool, null, Scalar
-
-T = TypeVar('T', bound="DType")
+    from ._types import DTypeT
 
 __all__ = ['Column']
 
 
-class Column(Generic[T]):
+class Column(Generic[DTypeT]):
     """
     Column object
 
@@ -79,7 +78,7 @@ def dtype(self) -> DType:
         Return data type of column.
         """
 
-    def get_rows(self, indices: Column[IntDType]) -> Column[T]:
+    def get_rows(self, indices: Column[IntDType]) -> Column[DTypeT]:
         """
         Select a subset of rows, similar to `ndarray.take`.
 
@@ -90,7 +89,7 @@ def get_rows(self, indices: Column[IntDType]) -> Column[T]:
         """
         ...
 
-    def get_value(self, row_number: Scalar[IntDType]) -> Scalar[T]:
+    def get_value(self, row_number: Scalar[IntDType]) -> Scalar[DTypeT]:
         """
         Select the value at a row number, similar to `ndarray.__getitem__(<int>)`.
 
@@ -137,7 +136,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:  # type: ignore[override]
+    def __eq__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -155,7 +154,7 @@ def __eq__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:  # type: ignore[
         Column
         """
 
-    def __ne__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:  # type: ignore[override]
+    def __ne__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -173,7 +172,7 @@ def __ne__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:  # type: ignore[
         Column
         """
 
-    def __ge__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
+    def __ge__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -189,7 +188,7 @@ def __ge__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
         Column
         """
 
-    def __gt__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
+    def __gt__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
         """
         Compare for "greater than" `other`.
 
@@ -205,7 +204,7 @@ def __gt__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
         Column
         """
 
-    def __le__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
+    def __le__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
         """
         Compare for "less than or equal to" `other`.
 
@@ -221,7 +220,7 @@ def __le__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
         Column
         """
 
-    def __lt__(self, other: Column[T] | Scalar[T]) -> Column[Bool]:
+    def __lt__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
         """
         Compare for "less than" `other`.
 
@@ -279,7 +278,7 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self, other: Column[T] | Scalar[T]) -> Column[T]:
+    def __add__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[DTypeT]:
         """
         Add `other` column or scalar to this column.
 
@@ -295,7 +294,7 @@ def __add__(self, other: Column[T] | Scalar[T]) -> Column[T]:
         Column
         """
 
-    def __sub__(self, other: Column[T] | Scalar[T]) -> Column[T]:
+    def __sub__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[DTypeT]:
         """
         Subtract `other` column or scalar from this column.
 
@@ -437,19 +436,19 @@ def all(self: Column[Bool], *, skip_nulls: bool = True) -> Scalar[Bool]:
             If column is not boolean.
         """
 
-    def min(self, *, skip_nulls: bool = True) -> Scalar[T]:
+    def min(self, *, skip_nulls: bool = True) -> Scalar[DTypeT]:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def max(self, *, skip_nulls: bool = True) -> Scalar[T]:
+    def max(self, *, skip_nulls: bool = True) -> Scalar[DTypeT]:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def sum(self, *, skip_nulls: bool = True) -> Scalar[T]:
+    def sum(self, *, skip_nulls: bool = True) -> Scalar[DTypeT]:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
@@ -532,7 +531,7 @@ def is_nan(self) -> Column[Bool]:
         In particular, does not check for `np.timedelta64('NaT')`.
         """
 
-    def is_in(self, values: Column[T]) -> Column[Bool]:
+    def is_in(self, values: Column[DTypeT]) -> Column[Bool]:
         """
         Indicate whether the value at each row matches any value in `values`.
 
@@ -571,7 +570,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[IntDType]:
         """
         ...
 
-    def fill_nan(self, value: float | 'null', /) -> Column[T]:
+    def fill_nan(self, value: float | 'null', /) -> Column[DTypeT]:
         """
         Fill floating point ``nan`` values with the given fill value.
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index dce657a7..6d076499 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -7,14 +7,13 @@
     from .column_object import Column
     from .groupby_object import GroupBy
     from . import DType, IntDType, FloatDType, Bool, null, Scalar
+    from ._types import DTypeT
 
 
 __all__ = ["DataFrame"]
 
-T = TypeVar("T", bound="DType")
 
-
-class DataFrame(Generic[T]):
+class DataFrame(Generic[DTypeT]):
     """
     DataFrame object
 
@@ -104,7 +103,7 @@ def groupby(self, keys: Sequence[str], /) -> GroupBy:
         """
         ...
 
-    def get_column_by_name(self, name: str, /) -> Column[T]:
+    def get_column_by_name(self, name: str, /) -> Column[DTypeT]:
         """
         Select a column by name.
 
@@ -123,7 +122,7 @@ def get_column_by_name(self, name: str, /) -> Column[T]:
         """
         ...
 
-    def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame[T]:
+    def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame[DTypeT]:
         """
         Select multiple columns by name.
 
@@ -142,7 +141,7 @@ def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame[T]:
         """
         ...
 
-    def get_rows(self, indices: Column[IntDType]) -> DataFrame[T]:
+    def get_rows(self, indices: Column[IntDType]) -> DataFrame[DTypeT]:
         """
         Select a subset of rows, similar to `ndarray.take`.
 
@@ -159,7 +158,7 @@ def get_rows(self, indices: Column[IntDType]) -> DataFrame[T]:
 
     def slice_rows(
         self, start: int | None, stop: int | None, step: int | None
-    ) -> DataFrame[T]:
+    ) -> DataFrame[DTypeT]:
         """
         Select a subset of rows corresponding to a slice.
 
@@ -175,7 +174,7 @@ def slice_rows(
         """
         ...
 
-    def get_rows_by_mask(self, mask: Column[Bool]) -> DataFrame[T]:
+    def get_rows_by_mask(self, mask: Column[Bool]) -> DataFrame[DTypeT]:
         """
         Select a subset of rows corresponding to a mask.
 
@@ -208,7 +207,7 @@ def insert(self, loc: int, label: str, value: Column[Any]) -> DataFrame[Any]:
         """
         ...
 
-    def drop_column(self, label: str) -> DataFrame[T]:
+    def drop_column(self, label: str) -> DataFrame[DTypeT]:
         """
         Drop the specified column.
 
@@ -227,7 +226,7 @@ def drop_column(self, label: str) -> DataFrame[T]:
         """
         ...
 
-    def rename_columns(self, mapping: Mapping[str, str]) -> DataFrame[T]:
+    def rename_columns(self, mapping: Mapping[str, str]) -> DataFrame[DTypeT]:
         """
         Rename columns.
 
@@ -293,7 +292,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: DataFrame[T] | Scalar[T]) -> DataFrame[Bool]:  # type: ignore[override]
+    def __eq__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -312,7 +311,7 @@ def __eq__(self, other: DataFrame[T] | Scalar[T]) -> DataFrame[Bool]:  # type: i
         """
         ...
 
-    def __ne__(self, other: DataFrame[T] | Scalar[T]) -> DataFrame[Bool]:  # type: ignore[override]
+    def __ne__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -650,13 +649,13 @@ def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
         """
         ...
 
-    def min(self, *, skip_nulls: bool = True) -> DataFrame[T]:
+    def min(self, *, skip_nulls: bool = True) -> DataFrame[DTypeT]:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def max(self, *, skip_nulls: bool = True) -> DataFrame[T]:
+    def max(self, *, skip_nulls: bool = True) -> DataFrame[DTypeT]:
         """
         Reduction returns a 1-row DataFrame.
         """
@@ -738,7 +737,7 @@ def is_nan(self) -> DataFrame[Bool]:
         """
         ...
 
-    def fill_nan(self, value: float | 'null', /) -> DataFrame[T]:
+    def fill_nan(self, value: float | 'null', /) -> DataFrame[DTypeT]:
         """
         Fill ``nan`` values with the given fill value.
 

From a096091902ce35f4b39b901a084e14cfed2c1535 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 11:18:40 +0100
Subject: [PATCH 13/38] getting there?

---
 spec/API_specification/dataframe_api/__init__.py         | 1 +
 spec/API_specification/dataframe_api/column_object.py    | 3 ++-
 spec/API_specification/dataframe_api/dataframe_object.py | 3 ++-
 spec/API_specification/index.rst                         | 1 +
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 7f0cc513..bf5ebdd9 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -29,6 +29,7 @@
     "Float64",
     "Float32",
     "Bool",
+    "DTypeT",
 ]
 
 
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 5cf4bbc1..69d8cb30 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -4,7 +4,8 @@
 
 if TYPE_CHECKING:
     from . import DType, IntDType, FloatDType, Bool, null, Scalar
-    from ._types import DTypeT
+
+from ._types import DTypeT
 
 __all__ = ['Column']
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 6d076499..729a57c4 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -2,12 +2,13 @@
 
 from typing import Any, Literal, Mapping, Sequence, Union, TYPE_CHECKING, NoReturn, TypeVar, Generic
 
+from ._types import DTypeT
 
 if TYPE_CHECKING:
     from .column_object import Column
     from .groupby_object import GroupBy
     from . import DType, IntDType, FloatDType, Bool, null, Scalar
-    from ._types import DTypeT
+
 
 
 __all__ = ["DataFrame"]
diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst
index 32b81a12..100659f6 100644
--- a/spec/API_specification/index.rst
+++ b/spec/API_specification/index.rst
@@ -28,6 +28,7 @@ of objects and functions in the top-level namespace. The latter are:
    Float64
    Float32
    Bool
+   DTypeT
 
 The ``DataFrame``, ``Column`` and ``GroupBy`` objects have the following
 methods and attributes:

From b3847d0743cc424cedcf1351a6363979eeb789e9 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 11:19:57 +0100
Subject: [PATCH 14/38] fixup

---
 spec/API_specification/dataframe_api/column_object.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 69d8cb30..2929539f 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -110,7 +110,7 @@ def get_value(self, row_number: Scalar[IntDType]) -> Scalar[DTypeT]:
     def sorted_indices(
         self,
         *,
-        ascending: Scalar[Bool] = True,
+        ascending: bool = True,
         nulls_position: Literal['first', 'last'] = 'last',
     ) -> Column[IntDType]:
         """

From ffcd9e0cdecf3984a80bbb98513635b241bebdb4 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 11:27:04 +0100
Subject: [PATCH 15/38] getting there?

---
 spec/API_specification/dataframe_api/__init__.py | 2 ++
 spec/API_specification/index.rst                 | 2 ++
 spec/conf.py                                     | 1 +
 3 files changed, 5 insertions(+)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index bf5ebdd9..f0c46353 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -30,6 +30,8 @@
     "Float32",
     "Bool",
     "DTypeT",
+    "FloatDType",
+    "IntDType",
 ]
 
 
diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst
index 100659f6..165266a4 100644
--- a/spec/API_specification/index.rst
+++ b/spec/API_specification/index.rst
@@ -29,6 +29,8 @@ of objects and functions in the top-level namespace. The latter are:
    Float32
    Bool
    DTypeT
+   IntDType
+   FloatDType
 
 The ``DataFrame``, ``Column`` and ``GroupBy`` objects have the following
 methods and attributes:
diff --git a/spec/conf.py b/spec/conf.py
index 8d3d7800..8b002d06 100644
--- a/spec/conf.py
+++ b/spec/conf.py
@@ -82,6 +82,7 @@
     ('py:class', 'enum.Enum'),
     ('py:class', 'ellipsis'),
     ('py:class', 'Scalar'),
+    ('py:class', 'DTypeT'),
 ]
 # NOTE: this alias handling isn't used yet - added in anticipation of future
 #       need based on dataframe API aliases.

From a474d59a8003bd0959fbac522937bed0236a0f3b Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 11:37:39 +0100
Subject: [PATCH 16/38] export DataFrame and Column

---
 spec/API_specification/dataframe_api/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index f0c46353..1db776c5 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -11,6 +11,8 @@
 from ._types import DTypeT
 
 __all__ = [
+    "DataFrame",
+    "Column",
     "__dataframe_api_version",
     "column_from_sequence",
     "concat",

From 38259abfdfbd51c2f055af42f437541eb487973b Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 11:40:20 +0100
Subject: [PATCH 17/38] ignore some nitpicks for now

---
 spec/conf.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/spec/conf.py b/spec/conf.py
index 8b002d06..da73887b 100644
--- a/spec/conf.py
+++ b/spec/conf.py
@@ -83,6 +83,9 @@
     ('py:class', 'ellipsis'),
     ('py:class', 'Scalar'),
     ('py:class', 'DTypeT'),
+    ('py:class', 'Bool'),
+    ('py:class', 'IntDType'),
+    ('py:class', 'FloatDType'),
 ]
 # NOTE: this alias handling isn't used yet - added in anticipation of future
 #       need based on dataframe API aliases.

From a4d81fc337340772e07b26775c22d6fc4fe3508f Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 12:01:58 +0100
Subject: [PATCH 18/38] more fixups

---
 spec/API_specification/dataframe_api/column_object.py    | 6 +++---
 spec/API_specification/dataframe_api/dataframe_object.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 2929539f..b24ef339 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -48,7 +48,7 @@ def __column_namespace__(
         """
     
     @property
-    def column(self) -> object:
+    def column(self) -> Any:
         """
         Return underlying (not-necessarily-Standard-compliant) column.
 
@@ -237,7 +237,7 @@ def __lt__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
         Column
         """
 
-    def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
+    def __and__(self: Column[Bool], other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:
         """
         Apply logical 'and' to `other` Column (or scalar) and this Column.
 
@@ -258,7 +258,7 @@ def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
+    def __or__(self: Column[Bool], other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:
         """
         Apply logical 'or' to `other` Column (or scalar) and this column.
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 729a57c4..8ba39d6c 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -399,7 +399,7 @@ def __lt__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
         """
         ...
 
-    def __and__(self, other: DataFrame[Bool] | bool) -> DataFrame[Bool]:
+    def __and__(self, other: DataFrame[Bool] | Scalar[Bool]) -> DataFrame[Bool]:
         """
         Apply logical 'and' to `other` DataFrame (or scalar) and this dataframe.
 
@@ -420,7 +420,7 @@ def __and__(self, other: DataFrame[Bool] | bool) -> DataFrame[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __or__(self, other: DataFrame[Bool] | bool) -> DataFrame[Bool]:
+    def __or__(self, other: DataFrame[Bool] | Scalar[Bool]) -> DataFrame[Bool]:
         """
         Apply logical 'or' to `other` DataFrame (or scalar) and this DataFrame.
 

From cb2f1e4515b010874155ade5c647ca38f070c89a Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 12:03:08 +0100
Subject: [PATCH 19/38] remove unnecessary file

---
 spec/API_specification/README.md | 7 -------
 1 file changed, 7 deletions(-)
 delete mode 100644 spec/API_specification/README.md

diff --git a/spec/API_specification/README.md b/spec/API_specification/README.md
deleted file mode 100644
index 0f2a16db..00000000
--- a/spec/API_specification/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# API Specification
-
-To type-check the spec, please install `mypy==1.4.0` and run
-
-```console
-mypy dataframe_api --strict
-```
\ No newline at end of file

From b7c00fb983c0e69d37ad16eee069695ab8273665 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 14:03:09 +0100
Subject: [PATCH 20/38] few more corrections

---
 spec/API_specification/.mypy.ini                         | 2 +-
 spec/API_specification/dataframe_api/__init__.py         | 2 +-
 spec/API_specification/dataframe_api/dataframe_object.py | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/spec/API_specification/.mypy.ini b/spec/API_specification/.mypy.ini
index eef0ed08..a0e568d5 100644
--- a/spec/API_specification/.mypy.ini
+++ b/spec/API_specification/.mypy.ini
@@ -1,3 +1,3 @@
-[mypy]
+[mypy-dataframe_api.*]
 strict=True
 disable_error_code=empty-body
diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 1db776c5..0c57adda 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -64,7 +64,7 @@ def concat(dataframes: Sequence[DataFrame[Any]]) -> DataFrame[Any]:
     """
     ...
 
-def column_from_sequence(sequence: Sequence[Scalar[DType]], *, dtype: DType) -> Column[DType]:
+def column_from_sequence(sequence: Sequence[Scalar[DType]], *, dtype: DType) -> Column[DTypeT]:
     """
     Construct Column from sequence of elements.
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 8ba39d6c..e9c98487 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -331,7 +331,7 @@ def __ne__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         ...
 
-    def __ge__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+    def __ge__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -348,7 +348,7 @@ def __ge__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
         """
         ...
 
-    def __gt__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+    def __gt__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         Compare for "greater than" `other`.
 
@@ -365,7 +365,7 @@ def __gt__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
         """
         ...
 
-    def __le__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+    def __le__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         Compare for "less than or equal to" `other`.
 
@@ -382,7 +382,7 @@ def __le__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
         """
         ...
 
-    def __lt__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Bool]:
+    def __lt__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         Compare for "less than" `other`.
 

From cfaafa5d4cde5c964a0687d575de0456ad46805c Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 24 Jun 2023 14:14:41 +0100
Subject: [PATCH 21/38] use mypy.ini

---
 .github/workflows/mypy.yml       | 2 +-
 spec/API_specification/.mypy.ini | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
index 4c7f436b..a11bde7a 100644
--- a/.github/workflows/mypy.yml
+++ b/.github/workflows/mypy.yml
@@ -29,4 +29,4 @@ jobs:
       - name: install-reqs
         run: python -m pip install --upgrade mypy==1.4.0
       - name: run mypy
-        run: cd spec/API_specification && mypy dataframe_api --strict --disable-error-code=empty-body
+        run: cd spec/API_specification && mypy dataframe_api
diff --git a/spec/API_specification/.mypy.ini b/spec/API_specification/.mypy.ini
index a0e568d5..b165602a 100644
--- a/spec/API_specification/.mypy.ini
+++ b/spec/API_specification/.mypy.ini
@@ -1,3 +1,5 @@
-[mypy-dataframe_api.*]
+[mypy]
 strict=True
+
+[mypy-dataframe_api.*]
 disable_error_code=empty-body

From e92a784f336447ef52a8ac844a6b34c1af48129f Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Tue, 27 Jun 2023 12:50:05 +0100
Subject: [PATCH 22/38] revert making DataFrame generic

---
 .../dataframe_api/__init__.py                 |  4 +-
 .../dataframe_api/dataframe_object.py         | 76 +++++++++----------
 .../dataframe_api/groupby_object.py           | 22 +++---
 3 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 0c57adda..2de0f599 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -44,7 +44,7 @@
 implementation of the dataframe API standard.
 """
 
-def concat(dataframes: Sequence[DataFrame[Any]]) -> DataFrame[Any]:
+def concat(dataframes: Sequence[DataFrame]) -> DataFrame:
     """
     Concatenate DataFrames vertically.
 
@@ -83,7 +83,7 @@ def column_from_sequence(sequence: Sequence[Scalar[DType]], *, dtype: DType) ->
     """
     ...
 
-def dataframe_from_dict(data: Mapping[str, Column[Any]]) -> DataFrame[Any]:
+def dataframe_from_dict(data: Mapping[str, Column[Any]]) -> DataFrame:
     """
     Construct DataFrame from map of column names to Columns.
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index e9c98487..51c68fc7 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -14,7 +14,7 @@
 __all__ = ["DataFrame"]
 
 
-class DataFrame(Generic[DTypeT]):
+class DataFrame:
     """
     DataFrame object
 
@@ -123,7 +123,7 @@ def get_column_by_name(self, name: str, /) -> Column[DTypeT]:
         """
         ...
 
-    def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame[DTypeT]:
+    def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame:
         """
         Select multiple columns by name.
 
@@ -142,7 +142,7 @@ def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame[DTypeT]:
         """
         ...
 
-    def get_rows(self, indices: Column[IntDType]) -> DataFrame[DTypeT]:
+    def get_rows(self, indices: Column[IntDType]) -> DataFrame:
         """
         Select a subset of rows, similar to `ndarray.take`.
 
@@ -159,7 +159,7 @@ def get_rows(self, indices: Column[IntDType]) -> DataFrame[DTypeT]:
 
     def slice_rows(
         self, start: int | None, stop: int | None, step: int | None
-    ) -> DataFrame[DTypeT]:
+    ) -> DataFrame:
         """
         Select a subset of rows corresponding to a slice.
 
@@ -175,7 +175,7 @@ def slice_rows(
         """
         ...
 
-    def get_rows_by_mask(self, mask: Column[Bool]) -> DataFrame[DTypeT]:
+    def get_rows_by_mask(self, mask: Column[Bool]) -> DataFrame:
         """
         Select a subset of rows corresponding to a mask.
 
@@ -194,7 +194,7 @@ def get_rows_by_mask(self, mask: Column[Bool]) -> DataFrame[DTypeT]:
         """
         ...
 
-    def insert(self, loc: int, label: str, value: Column[Any]) -> DataFrame[Any]:
+    def insert(self, loc: int, label: str, value: Column[Any]) -> DataFrame:
         """
         Insert column into DataFrame at specified location.
 
@@ -208,7 +208,7 @@ def insert(self, loc: int, label: str, value: Column[Any]) -> DataFrame[Any]:
         """
         ...
 
-    def drop_column(self, label: str) -> DataFrame[DTypeT]:
+    def drop_column(self, label: str) -> DataFrame:
         """
         Drop the specified column.
 
@@ -227,7 +227,7 @@ def drop_column(self, label: str) -> DataFrame[DTypeT]:
         """
         ...
 
-    def rename_columns(self, mapping: Mapping[str, str]) -> DataFrame[DTypeT]:
+    def rename_columns(self, mapping: Mapping[str, str]) -> DataFrame:
         """
         Rename columns.
 
@@ -293,7 +293,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:  # type: ignore[override]
+    def __eq__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -312,7 +312,7 @@ def __eq__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         ...
 
-    def __ne__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:  # type: ignore[override]
+    def __ne__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -331,7 +331,7 @@ def __ne__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         ...
 
-    def __ge__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
+    def __ge__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -348,7 +348,7 @@ def __ge__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         ...
 
-    def __gt__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
+    def __gt__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
         """
         Compare for "greater than" `other`.
 
@@ -365,7 +365,7 @@ def __gt__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         ...
 
-    def __le__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
+    def __le__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
         """
         Compare for "less than or equal to" `other`.
 
@@ -382,7 +382,7 @@ def __le__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         ...
 
-    def __lt__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
+    def __lt__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
         """
         Compare for "less than" `other`.
 
@@ -399,7 +399,7 @@ def __lt__(self, other: DataFrame[DTypeT] | Scalar[DTypeT]) -> DataFrame[Bool]:
         """
         ...
 
-    def __and__(self, other: DataFrame[Bool] | Scalar[Bool]) -> DataFrame[Bool]:
+    def __and__(self, other: DataFrame | Scalar[Bool]) -> DataFrame:
         """
         Apply logical 'and' to `other` DataFrame (or scalar) and this dataframe.
 
@@ -420,7 +420,7 @@ def __and__(self, other: DataFrame[Bool] | Scalar[Bool]) -> DataFrame[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __or__(self, other: DataFrame[Bool] | Scalar[Bool]) -> DataFrame[Bool]:
+    def __or__(self, other: DataFrame | Scalar[Bool]) -> DataFrame:
         """
         Apply logical 'or' to `other` DataFrame (or scalar) and this DataFrame.
 
@@ -441,7 +441,7 @@ def __or__(self, other: DataFrame[Bool] | Scalar[Bool]) -> DataFrame[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+    def __add__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         Add `other` dataframe or scalar to this dataframe.
 
@@ -458,7 +458,7 @@ def __add__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
         """
         ...
 
-    def __sub__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+    def __sub__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         Subtract `other` dataframe or scalar from this dataframe.
 
@@ -475,7 +475,7 @@ def __sub__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
         """
         ...
 
-    def __mul__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+    def __mul__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         Multiply  `other` dataframe or scalar with this dataframe.
 
@@ -492,7 +492,7 @@ def __mul__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
         """
         ...
 
-    def __truediv__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+    def __truediv__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         Divide  this dataframe by `other` dataframe or scalar. True division, returns floats.
 
@@ -509,7 +509,7 @@ def __truediv__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
         """
         ...
 
-    def __floordiv__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+    def __floordiv__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         Floor-divide (returns integers) this dataframe by `other` dataframe or scalar.
 
@@ -526,7 +526,7 @@ def __floordiv__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
         """
         ...
 
-    def __pow__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+    def __pow__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         Raise this dataframe to the power of `other`.
 
@@ -543,7 +543,7 @@ def __pow__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
         """
         ...
 
-    def __mod__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
+    def __mod__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         Return modulus of this dataframe by `other` (`%` operator).
 
@@ -560,7 +560,7 @@ def __mod__(self, other: DataFrame[Any] | Scalar[Any]) -> DataFrame[Any]:
         """
         ...
 
-    def __divmod__(self, other: DataFrame[Any] | Scalar[Any]) -> tuple[DataFrame[Any], DataFrame[Any]]:
+    def __divmod__(self, other: DataFrame | Scalar[Any]) -> tuple[DataFrame, DataFrame]:
         """
         Return quotient and remainder of integer division. See `divmod` builtin function.
 
@@ -577,7 +577,7 @@ def __divmod__(self, other: DataFrame[Any] | Scalar[Any]) -> tuple[DataFrame[Any
         """
         ...
 
-    def __invert__(self: DataFrame[Bool]) -> DataFrame[Bool]:
+    def __invert__(self: DataFrame) -> DataFrame:
         """
         Invert truthiness of (boolean) elements.
 
@@ -600,7 +600,7 @@ def __iter__(self) -> NoReturn:
         """
         raise NotImplementedError("'__iter__' is intentionally not implemented.")
 
-    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+    def any(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
 
@@ -611,7 +611,7 @@ def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
         """
         ...
 
-    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+    def all(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
 
@@ -650,55 +650,55 @@ def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
         """
         ...
 
-    def min(self, *, skip_nulls: bool = True) -> DataFrame[DTypeT]:
+    def min(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def max(self, *, skip_nulls: bool = True) -> DataFrame[DTypeT]:
+    def max(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def sum(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def prod(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def median(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def mean(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def std(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def var(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
         """
         ...
 
-    def is_null(self) -> DataFrame[Bool]:
+    def is_null(self) -> DataFrame:
         """
         Check for 'missing' or 'null' entries.
 
@@ -718,7 +718,7 @@ def is_null(self) -> DataFrame[Bool]:
         """
         ...
 
-    def is_nan(self) -> DataFrame[Bool]:
+    def is_nan(self) -> DataFrame:
         """
         Check for nan entries.
 
@@ -738,7 +738,7 @@ def is_nan(self) -> DataFrame[Bool]:
         """
         ...
 
-    def fill_nan(self, value: float | 'null', /) -> DataFrame[DTypeT]:
+    def fill_nan(self, value: float | 'null', /) -> DataFrame:
         """
         Fill ``nan`` values with the given fill value.
 
diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py
index eb06f27d..43ffa9b1 100644
--- a/spec/API_specification/dataframe_api/groupby_object.py
+++ b/spec/API_specification/dataframe_api/groupby_object.py
@@ -22,35 +22,35 @@ class GroupBy:
     **Methods**
 
     """
-    def any(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+    def any(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def all(self, *, skip_nulls: bool = True) -> DataFrame[Bool]:
+    def all(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def min(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def min(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def max(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def max(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def sum(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def sum(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def prod(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def prod(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def median(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def median(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def mean(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def mean(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def std(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def std(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def var(self, *, skip_nulls: bool = True) -> DataFrame[Any]:
+    def var(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def size(self) -> DataFrame[IntDType]:
+    def size(self) -> DataFrame:
         ...

From b67398e79fedf506a0f68c97eba421c086e26267 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Wed, 28 Jun 2023 14:21:17 +0100
Subject: [PATCH 23/38] revert making Scalar generic

---
 .../dataframe_api/__init__.py                 |  6 +-
 .../dataframe_api/column_object.py            | 56 +++++++++----------
 .../dataframe_api/dataframe_object.py         | 32 +++++------
 3 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 2de0f599..5a65b756 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -64,7 +64,7 @@ def concat(dataframes: Sequence[DataFrame]) -> DataFrame:
     """
     ...
 
-def column_from_sequence(sequence: Sequence[Scalar[DType]], *, dtype: DType) -> Column[DTypeT]:
+def column_from_sequence(sequence: Sequence[Scalar], *, dtype: DType) -> Column[DTypeT]:
     """
     Construct Column from sequence of elements.
 
@@ -128,7 +128,7 @@ class null:
     """
     ...
 
-def is_null(value: object, /) -> Scalar[Bool]:
+def is_null(value: object, /) -> bool:
     """
     Check if an object is a `null` scalar.
 
@@ -195,5 +195,5 @@ class Bool(DType):
 # Scalar #
 ##########
 
-class Scalar(Generic[DTypeT]):
+class Scalar:
     ...
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index b24ef339..d37032cf 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -90,7 +90,7 @@ def get_rows(self, indices: Column[IntDType]) -> Column[DTypeT]:
         """
         ...
 
-    def get_value(self, row_number: Scalar[IntDType]) -> Scalar[DTypeT]:
+    def get_value(self, row_number: int) -> Scalar:
         """
         Select the value at a row number, similar to `ndarray.__getitem__(<int>)`.
 
@@ -122,7 +122,7 @@ def sorted_indices(
 
         Parameters
         ----------
-        ascending : Scalar[Bool]
+        ascending : bool
             If `True`, sort in ascending order.
             If `False`, sort in descending order.
         nulls_position : ``{'first', 'last'}``
@@ -137,7 +137,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:  # type: ignore[override]
+    def __eq__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -155,7 +155,7 @@ def __eq__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:  # typ
         Column
         """
 
-    def __ne__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:  # type: ignore[override]
+    def __ne__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -173,7 +173,7 @@ def __ne__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:  # typ
         Column
         """
 
-    def __ge__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
+    def __ge__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -189,7 +189,7 @@ def __ge__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
         Column
         """
 
-    def __gt__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
+    def __gt__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
         """
         Compare for "greater than" `other`.
 
@@ -205,7 +205,7 @@ def __gt__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
         Column
         """
 
-    def __le__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
+    def __le__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
         """
         Compare for "less than or equal to" `other`.
 
@@ -221,7 +221,7 @@ def __le__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
         Column
         """
 
-    def __lt__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
+    def __lt__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
         """
         Compare for "less than" `other`.
 
@@ -237,7 +237,7 @@ def __lt__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[Bool]:
         Column
         """
 
-    def __and__(self: Column[Bool], other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:
+    def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
         """
         Apply logical 'and' to `other` Column (or scalar) and this Column.
 
@@ -258,7 +258,7 @@ def __and__(self: Column[Bool], other: Column[Bool] | Scalar[Bool]) -> Column[Bo
             If `self` or `other` is not boolean.
         """
 
-    def __or__(self: Column[Bool], other: Column[Bool] | Scalar[Bool]) -> Column[Bool]:
+    def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
         """
         Apply logical 'or' to `other` Column (or scalar) and this column.
 
@@ -279,7 +279,7 @@ def __or__(self: Column[Bool], other: Column[Bool] | Scalar[Bool]) -> Column[Boo
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[DTypeT]:
+    def __add__(self, other: Column[DTypeT] | Scalar) -> Column[DTypeT]:
         """
         Add `other` column or scalar to this column.
 
@@ -295,7 +295,7 @@ def __add__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[DTypeT]:
         Column
         """
 
-    def __sub__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[DTypeT]:
+    def __sub__(self, other: Column[DTypeT] | Scalar) -> Column[DTypeT]:
         """
         Subtract `other` column or scalar from this column.
 
@@ -311,7 +311,7 @@ def __sub__(self, other: Column[DTypeT] | Scalar[DTypeT]) -> Column[DTypeT]:
         Column
         """
 
-    def __mul__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __mul__(self, other: Column[Any] | Scalar) -> Column[Any]:
         """
         Multiply `other` column or scalar with this column.
 
@@ -327,7 +327,7 @@ def __mul__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __truediv__(self, other: Column[Any] | Scalar[Any]) -> Column[FloatDType]:
+    def __truediv__(self, other: Column[Any] | Scalar) -> Column[FloatDType]:
         """
         Divide this column by `other` column or scalar. True division, returns floats.
 
@@ -343,7 +343,7 @@ def __truediv__(self, other: Column[Any] | Scalar[Any]) -> Column[FloatDType]:
         Column
         """
 
-    def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[IntDType]:
+    def __floordiv__(self, other: Column[Any] | Scalar) -> Column[IntDType]:
         """
         Floor-divide `other` column or scalar to this column.
 
@@ -359,7 +359,7 @@ def __floordiv__(self, other: Column[Any] | Scalar[Any]) -> Column[IntDType]:
         Column
         """
 
-    def __pow__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __pow__(self, other: Column[Any] | Scalar) -> Column[Any]:
         """
         Raise this column to the power of `other`.
 
@@ -375,7 +375,7 @@ def __pow__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __mod__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
+    def __mod__(self, other: Column[Any] | Scalar) -> Column[Any]:
         """
         Returns modulus of this column by `other` (`%` operator).
 
@@ -391,7 +391,7 @@ def __mod__(self, other: Column[Any] | Scalar[Any]) -> Column[Any]:
         Column
         """
 
-    def __divmod__(self, other: Column[Any] | Scalar[Any]) -> tuple[Column[IntDType], Column[FloatDType]]:
+    def __divmod__(self, other: Column[Any] | Scalar) -> tuple[Column[IntDType], Column[FloatDType]]:
         """
         Return quotient and remainder of integer division. See `divmod` builtin function.
 
@@ -417,7 +417,7 @@ def __invert__(self: Column[Bool]) -> Column[Bool]:
             If any of the Column's columns is not boolean.
         """
 
-    def any(self: Column[Bool], *, skip_nulls: bool = True) -> Scalar[Bool]:
+    def any(self: Column[Bool], *, skip_nulls: bool = True) -> bool:
         """
         Reduction returns a bool.
 
@@ -427,7 +427,7 @@ def any(self: Column[Bool], *, skip_nulls: bool = True) -> Scalar[Bool]:
             If column is not boolean.
         """
 
-    def all(self: Column[Bool], *, skip_nulls: bool = True) -> Scalar[Bool]:
+    def all(self: Column[Bool], *, skip_nulls: bool = True) -> bool:
         """
         Reduction returns a bool.
 
@@ -437,32 +437,32 @@ def all(self: Column[Bool], *, skip_nulls: bool = True) -> Scalar[Bool]:
             If column is not boolean.
         """
 
-    def min(self, *, skip_nulls: bool = True) -> Scalar[DTypeT]:
+    def min(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def max(self, *, skip_nulls: bool = True) -> Scalar[DTypeT]:
+    def max(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def sum(self, *, skip_nulls: bool = True) -> Scalar[DTypeT]:
+    def sum(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
         column.
         """
 
-    def prod(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+    def prod(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical data types.
         The returned value has the same dtype as the column.
         """
 
-    def median(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+    def median(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -470,7 +470,7 @@ def median(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         dtypes.
         """
 
-    def mean(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+    def mean(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -478,7 +478,7 @@ def mean(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         dtypes.
         """
 
-    def std(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+    def std(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -486,7 +486,7 @@ def std(self, *, skip_nulls: bool = True) -> Scalar[Any]:
         dtypes.
         """
 
-    def var(self, *, skip_nulls: bool = True) -> Scalar[Any]:
+    def var(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 51c68fc7..5627cd63 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -293,7 +293,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:  # type: ignore[override]
+    def __eq__(self, other: DataFrame | Scalar) -> DataFrame:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -312,7 +312,7 @@ def __eq__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:  # type: ignor
         """
         ...
 
-    def __ne__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:  # type: ignore[override]
+    def __ne__(self, other: DataFrame | Scalar) -> DataFrame:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -331,7 +331,7 @@ def __ne__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:  # type: ignor
         """
         ...
 
-    def __ge__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
+    def __ge__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -348,7 +348,7 @@ def __ge__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
         """
         ...
 
-    def __gt__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
+    def __gt__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Compare for "greater than" `other`.
 
@@ -365,7 +365,7 @@ def __gt__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
         """
         ...
 
-    def __le__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
+    def __le__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Compare for "less than or equal to" `other`.
 
@@ -382,7 +382,7 @@ def __le__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
         """
         ...
 
-    def __lt__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
+    def __lt__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Compare for "less than" `other`.
 
@@ -399,7 +399,7 @@ def __lt__(self, other: DataFrame | Scalar[DTypeT]) -> DataFrame:
         """
         ...
 
-    def __and__(self, other: DataFrame | Scalar[Bool]) -> DataFrame:
+    def __and__(self, other: DataFrame | bool) -> DataFrame:
         """
         Apply logical 'and' to `other` DataFrame (or scalar) and this dataframe.
 
@@ -420,7 +420,7 @@ def __and__(self, other: DataFrame | Scalar[Bool]) -> DataFrame:
             If `self` or `other` is not boolean.
         """
 
-    def __or__(self, other: DataFrame | Scalar[Bool]) -> DataFrame:
+    def __or__(self, other: DataFrame | bool) -> DataFrame:
         """
         Apply logical 'or' to `other` DataFrame (or scalar) and this DataFrame.
 
@@ -441,7 +441,7 @@ def __or__(self, other: DataFrame | Scalar[Bool]) -> DataFrame:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
+    def __add__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Add `other` dataframe or scalar to this dataframe.
 
@@ -458,7 +458,7 @@ def __add__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         ...
 
-    def __sub__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
+    def __sub__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Subtract `other` dataframe or scalar from this dataframe.
 
@@ -475,7 +475,7 @@ def __sub__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         ...
 
-    def __mul__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
+    def __mul__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Multiply  `other` dataframe or scalar with this dataframe.
 
@@ -492,7 +492,7 @@ def __mul__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         ...
 
-    def __truediv__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
+    def __truediv__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Divide  this dataframe by `other` dataframe or scalar. True division, returns floats.
 
@@ -509,7 +509,7 @@ def __truediv__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         ...
 
-    def __floordiv__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
+    def __floordiv__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Floor-divide (returns integers) this dataframe by `other` dataframe or scalar.
 
@@ -526,7 +526,7 @@ def __floordiv__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         ...
 
-    def __pow__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
+    def __pow__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Raise this dataframe to the power of `other`.
 
@@ -543,7 +543,7 @@ def __pow__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         ...
 
-    def __mod__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
+    def __mod__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Return modulus of this dataframe by `other` (`%` operator).
 
@@ -560,7 +560,7 @@ def __mod__(self, other: DataFrame | Scalar[Any]) -> DataFrame:
         """
         ...
 
-    def __divmod__(self, other: DataFrame | Scalar[Any]) -> tuple[DataFrame, DataFrame]:
+    def __divmod__(self, other: DataFrame | Scalar) -> tuple[DataFrame, DataFrame]:
         """
         Return quotient and remainder of integer division. See `divmod` builtin function.
 

From f28ddcd496f2a2af568b6ef94aa841f5dcfa7690 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Jun 2023 08:42:18 +0100
Subject: [PATCH 24/38] remove DType class

---
 .../dataframe_api/__init__.py                 | 37 +++++++----------
 .../API_specification/dataframe_api/_types.py |  4 +-
 .../dataframe_api/column_object.py            | 40 +++++++++----------
 .../dataframe_api/dataframe_object.py         | 10 ++---
 .../dataframe_api/groupby_object.py           |  2 -
 5 files changed, 40 insertions(+), 53 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 5a65b756..24fd0d69 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -8,7 +8,7 @@
 from .column_object import *
 from .dataframe_object import DataFrame
 from .groupby_object import *
-from ._types import DTypeT
+from ._types import DType
 
 __all__ = [
     "DataFrame",
@@ -31,7 +31,7 @@
     "Float64",
     "Float32",
     "Bool",
-    "DTypeT",
+    "DType",
     "FloatDType",
     "IntDType",
 ]
@@ -64,7 +64,7 @@ def concat(dataframes: Sequence[DataFrame]) -> DataFrame:
     """
     ...
 
-def column_from_sequence(sequence: Sequence[Scalar], *, dtype: DType) -> Column[DTypeT]:
+def column_from_sequence(sequence: Sequence[Scalar], *, dtype: Any) -> Column[Any]:
     """
     Construct Column from sequence of elements.
 
@@ -149,46 +149,37 @@ def is_null(value: object, /) -> bool:
 # Dtypes #
 ##########
 
-class DType:
-    """Base class for all dtypes."""
-
-class IntDType(DType):
-    """Base class for all integer dtypes."""
-
-class FloatDType(DType):
-    """Base class for all float dtypes."""
-
-class Int64(IntDType):
+class Int64:
     """Integer type with 64 bits of precision."""
 
-class Int32(IntDType):
+class Int32:
     """Integer type with 32 bits of precision."""
 
-class Int16(IntDType):
+class Int16:
     """Integer type with 16 bits of precision."""
 
-class Int8(IntDType):
+class Int8:
     """Integer type with 8 bits of precision."""
 
-class UInt64(IntDType):
+class UInt64:
     """Unsigned integer type with 64 bits of precision."""
 
-class UInt32(IntDType):
+class UInt32:
     """Unsigned integer type with 32 bits of precision."""
 
-class UInt16(IntDType):
+class UInt16:
     """Unsigned integer type with 16 bits of precision."""
 
-class UInt8(IntDType):
+class UInt8:
     """Unsigned integer type with 8 bits of precision."""
 
-class Float64(FloatDType):
+class Float64:
     """Floating point type with 64 bits of precision."""
 
-class Float32(FloatDType):
+class Float32:
     """Floating point type with 32 bits of precision."""
 
-class Bool(DType):
+class Bool:
     """Boolean type with 8 bits of precision."""
 
 ##########
diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py
index f01e6163..320e1c2c 100644
--- a/spec/API_specification/dataframe_api/_types.py
+++ b/spec/API_specification/dataframe_api/_types.py
@@ -21,13 +21,11 @@
 )
 from enum import Enum
 
-if TYPE_CHECKING:
-    from . import DType
 
 array = TypeVar("array")
 Scalar = TypeVar("Scalar")
 device = TypeVar("device")
-DTypeT = TypeVar("DTypeT", bound="DType")
+DType = TypeVar("DType")
 SupportsDLPack = TypeVar("SupportsDLPack")
 SupportsBufferProtocol = TypeVar("SupportsBufferProtocol")
 PyCapsule = TypeVar("PyCapsule")
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index d37032cf..d81fab62 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -3,14 +3,14 @@
 from typing import Any,NoReturn, Sequence, TYPE_CHECKING, Literal, Generic, TypeVar
 
 if TYPE_CHECKING:
-    from . import DType, IntDType, FloatDType, Bool, null, Scalar
+    from . import Bool, null, Scalar
+    from ._types import DType
 
-from ._types import DTypeT
 
 __all__ = ['Column']
 
 
-class Column(Generic[DTypeT]):
+class Column(Generic[DType]):
     """
     Column object
 
@@ -74,12 +74,12 @@ def __iter__(self) -> NoReturn:
         raise NotImplementedError("'__iter__' is intentionally not implemented.")
 
     @property
-    def dtype(self) -> DType:
+    def dtype(self) -> Any:
         """
         Return data type of column.
         """
 
-    def get_rows(self, indices: Column[IntDType]) -> Column[DTypeT]:
+    def get_rows(self: Column[DType], indices: Column[Any]) -> Column[DType]:
         """
         Select a subset of rows, similar to `ndarray.take`.
 
@@ -112,7 +112,7 @@ def sorted_indices(
         *,
         ascending: bool = True,
         nulls_position: Literal['first', 'last'] = 'last',
-    ) -> Column[IntDType]:
+    ) -> Column[Any]:
         """
         Return row numbers which would sort column.
 
@@ -137,7 +137,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:  # type: ignore[override]
+    def __eq__(self, other: Column[DType] | Scalar) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -155,7 +155,7 @@ def __eq__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:  # type: ignor
         Column
         """
 
-    def __ne__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:  # type: ignore[override]
+    def __ne__(self, other: Column[DType] | Scalar) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -173,7 +173,7 @@ def __ne__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:  # type: ignor
         Column
         """
 
-    def __ge__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
+    def __ge__(self, other: Column[DType] | Scalar) -> Column[Bool]:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -189,7 +189,7 @@ def __ge__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
         Column
         """
 
-    def __gt__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
+    def __gt__(self, other: Column[DType] | Scalar) -> Column[Bool]:
         """
         Compare for "greater than" `other`.
 
@@ -205,7 +205,7 @@ def __gt__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
         Column
         """
 
-    def __le__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
+    def __le__(self, other: Column[DType] | Scalar) -> Column[Bool]:
         """
         Compare for "less than or equal to" `other`.
 
@@ -221,7 +221,7 @@ def __le__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
         Column
         """
 
-    def __lt__(self, other: Column[DTypeT] | Scalar) -> Column[Bool]:
+    def __lt__(self, other: Column[DType] | Scalar) -> Column[Bool]:
         """
         Compare for "less than" `other`.
 
@@ -279,7 +279,7 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self, other: Column[DTypeT] | Scalar) -> Column[DTypeT]:
+    def __add__(self, other: Column[DType] | Scalar) -> Column[DType]:
         """
         Add `other` column or scalar to this column.
 
@@ -295,7 +295,7 @@ def __add__(self, other: Column[DTypeT] | Scalar) -> Column[DTypeT]:
         Column
         """
 
-    def __sub__(self, other: Column[DTypeT] | Scalar) -> Column[DTypeT]:
+    def __sub__(self, other: Column[DType] | Scalar) -> Column[DType]:
         """
         Subtract `other` column or scalar from this column.
 
@@ -327,7 +327,7 @@ def __mul__(self, other: Column[Any] | Scalar) -> Column[Any]:
         Column
         """
 
-    def __truediv__(self, other: Column[Any] | Scalar) -> Column[FloatDType]:
+    def __truediv__(self, other: Column[Any] | Scalar) -> Column[Any]:
         """
         Divide this column by `other` column or scalar. True division, returns floats.
 
@@ -343,7 +343,7 @@ def __truediv__(self, other: Column[Any] | Scalar) -> Column[FloatDType]:
         Column
         """
 
-    def __floordiv__(self, other: Column[Any] | Scalar) -> Column[IntDType]:
+    def __floordiv__(self, other: Column[Any] | Scalar) -> Column[Any]:
         """
         Floor-divide `other` column or scalar to this column.
 
@@ -391,7 +391,7 @@ def __mod__(self, other: Column[Any] | Scalar) -> Column[Any]:
         Column
         """
 
-    def __divmod__(self, other: Column[Any] | Scalar) -> tuple[Column[IntDType], Column[FloatDType]]:
+    def __divmod__(self, other: Column[Any] | Scalar) -> tuple[Column[Any], Column[Any]]:
         """
         Return quotient and remainder of integer division. See `divmod` builtin function.
 
@@ -532,7 +532,7 @@ def is_nan(self) -> Column[Bool]:
         In particular, does not check for `np.timedelta64('NaT')`.
         """
 
-    def is_in(self, values: Column[DTypeT]) -> Column[Bool]:
+    def is_in(self, values: Column[DType]) -> Column[Bool]:
         """
         Indicate whether the value at each row matches any value in `values`.
 
@@ -550,7 +550,7 @@ def is_in(self, values: Column[DTypeT]) -> Column[Bool]:
         Column[bool]
         """
 
-    def unique_indices(self, *, skip_nulls: bool = True) -> Column[IntDType]:
+    def unique_indices(self, *, skip_nulls: bool = True) -> Column[Any]:
         """
         Return indices corresponding to unique values in Column.
 
@@ -571,7 +571,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[IntDType]:
         """
         ...
 
-    def fill_nan(self, value: float | 'null', /) -> Column[DTypeT]:
+    def fill_nan(self, value: float | 'null', /) -> Column[DType]:
         """
         Fill floating point ``nan`` values with the given fill value.
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 5627cd63..1c6dd9ec 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -2,12 +2,12 @@
 
 from typing import Any, Literal, Mapping, Sequence, Union, TYPE_CHECKING, NoReturn, TypeVar, Generic
 
-from ._types import DTypeT
+from ._types import DType
 
 if TYPE_CHECKING:
     from .column_object import Column
     from .groupby_object import GroupBy
-    from . import DType, IntDType, FloatDType, Bool, null, Scalar
+    from . import Bool, null, Scalar
 
 
 
@@ -104,7 +104,7 @@ def groupby(self, keys: Sequence[str], /) -> GroupBy:
         """
         ...
 
-    def get_column_by_name(self, name: str, /) -> Column[DTypeT]:
+    def get_column_by_name(self, name: str, /) -> Column[DType]:
         """
         Select a column by name.
 
@@ -142,7 +142,7 @@ def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame:
         """
         ...
 
-    def get_rows(self, indices: Column[IntDType]) -> DataFrame:
+    def get_rows(self, indices: Column[Any]) -> DataFrame:
         """
         Select a subset of rows, similar to `ndarray.take`.
 
@@ -258,7 +258,7 @@ def sorted_indices(
         *,
         ascending: Sequence[bool] | bool = True,
         nulls_position: Literal['first', 'last'] = 'last',
-    ) -> Column[IntDType]:
+    ) -> Column[Any]:
         """
         Return row numbers which would sort according to given columns.
 
diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py
index 43ffa9b1..096c4e11 100644
--- a/spec/API_specification/dataframe_api/groupby_object.py
+++ b/spec/API_specification/dataframe_api/groupby_object.py
@@ -4,12 +4,10 @@
 
 if TYPE_CHECKING:
     from .dataframe_object import DataFrame
-    from . import IntDType, DType, Bool
 
 
 __all__ = ['GroupBy']
 
-T = TypeVar('T', bound="DType")
 
 
 class GroupBy:

From 06f8aa8e41b874bf9e15e6568e0f107802c950d7 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Jun 2023 08:44:33 +0100
Subject: [PATCH 25/38] fixup

---
 spec/API_specification/dataframe_api/__init__.py | 5 -----
 spec/API_specification/dataframe_api/_types.py   | 1 -
 2 files changed, 6 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 24fd0d69..9dbaf30a 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -19,7 +19,6 @@
     "dataframe_from_dict",
     "is_null",
     "null",
-    "DType",
     "Int64",
     "Int32",
     "Int16",
@@ -30,10 +29,6 @@
     "UInt8",
     "Float64",
     "Float32",
-    "Bool",
-    "DType",
-    "FloatDType",
-    "IntDType",
 ]
 
 
diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py
index 320e1c2c..0d90c75e 100644
--- a/spec/API_specification/dataframe_api/_types.py
+++ b/spec/API_specification/dataframe_api/_types.py
@@ -23,7 +23,6 @@
 
 
 array = TypeVar("array")
-Scalar = TypeVar("Scalar")
 device = TypeVar("device")
 DType = TypeVar("DType")
 SupportsDLPack = TypeVar("SupportsDLPack")

From 7a26f3a712c25ca28b131d2b58f3b2ba68d4b765 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Jun 2023 08:51:54 +0100
Subject: [PATCH 26/38] dont export Scalar, replace it with Any

---
 .../dataframe_api/__init__.py                 |  9 +---
 .../API_specification/dataframe_api/_types.py |  2 +-
 .../dataframe_api/column_object.py            | 48 +++++++++----------
 .../dataframe_api/dataframe_object.py         | 30 ++++++------
 spec/design_topics/python_builtin_types.md    |  2 +-
 5 files changed, 42 insertions(+), 49 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 9dbaf30a..08015abb 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -59,7 +59,7 @@ def concat(dataframes: Sequence[DataFrame]) -> DataFrame:
     """
     ...
 
-def column_from_sequence(sequence: Sequence[Scalar], *, dtype: Any) -> Column[Any]:
+def column_from_sequence(sequence: Sequence[Any], *, dtype: Any) -> Column[Any]:
     """
     Construct Column from sequence of elements.
 
@@ -176,10 +176,3 @@ class Float32:
 
 class Bool:
     """Boolean type with 8 bits of precision."""
-
-##########
-# Scalar #
-##########
-
-class Scalar:
-    ...
diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py
index 0d90c75e..030d920c 100644
--- a/spec/API_specification/dataframe_api/_types.py
+++ b/spec/API_specification/dataframe_api/_types.py
@@ -21,8 +21,8 @@
 )
 from enum import Enum
 
-
 array = TypeVar("array")
+Scalar = TypeVar("Scalar")
 device = TypeVar("device")
 DType = TypeVar("DType")
 SupportsDLPack = TypeVar("SupportsDLPack")
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index d81fab62..9da43f74 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -3,7 +3,7 @@
 from typing import Any,NoReturn, Sequence, TYPE_CHECKING, Literal, Generic, TypeVar
 
 if TYPE_CHECKING:
-    from . import Bool, null, Scalar
+    from . import Bool, null
     from ._types import DType
 
 
@@ -90,7 +90,7 @@ def get_rows(self: Column[DType], indices: Column[Any]) -> Column[DType]:
         """
         ...
 
-    def get_value(self, row_number: int) -> Scalar:
+    def get_value(self, row_number: int) -> Any:
         """
         Select the value at a row number, similar to `ndarray.__getitem__(<int>)`.
 
@@ -137,7 +137,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: Column[DType] | Scalar) -> Column[Bool]:  # type: ignore[override]
+    def __eq__(self, other: Column[DType] | Any) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -155,7 +155,7 @@ def __eq__(self, other: Column[DType] | Scalar) -> Column[Bool]:  # type: ignore
         Column
         """
 
-    def __ne__(self, other: Column[DType] | Scalar) -> Column[Bool]:  # type: ignore[override]
+    def __ne__(self, other: Column[DType] | Any) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -173,7 +173,7 @@ def __ne__(self, other: Column[DType] | Scalar) -> Column[Bool]:  # type: ignore
         Column
         """
 
-    def __ge__(self, other: Column[DType] | Scalar) -> Column[Bool]:
+    def __ge__(self, other: Column[DType] | Any) -> Column[Bool]:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -189,7 +189,7 @@ def __ge__(self, other: Column[DType] | Scalar) -> Column[Bool]:
         Column
         """
 
-    def __gt__(self, other: Column[DType] | Scalar) -> Column[Bool]:
+    def __gt__(self, other: Column[DType] | Any) -> Column[Bool]:
         """
         Compare for "greater than" `other`.
 
@@ -205,7 +205,7 @@ def __gt__(self, other: Column[DType] | Scalar) -> Column[Bool]:
         Column
         """
 
-    def __le__(self, other: Column[DType] | Scalar) -> Column[Bool]:
+    def __le__(self, other: Column[DType] | Any) -> Column[Bool]:
         """
         Compare for "less than or equal to" `other`.
 
@@ -221,7 +221,7 @@ def __le__(self, other: Column[DType] | Scalar) -> Column[Bool]:
         Column
         """
 
-    def __lt__(self, other: Column[DType] | Scalar) -> Column[Bool]:
+    def __lt__(self, other: Column[DType] | Any) -> Column[Bool]:
         """
         Compare for "less than" `other`.
 
@@ -279,7 +279,7 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self, other: Column[DType] | Scalar) -> Column[DType]:
+    def __add__(self, other: Column[DType] | Any) -> Column[DType]:
         """
         Add `other` column or scalar to this column.
 
@@ -295,7 +295,7 @@ def __add__(self, other: Column[DType] | Scalar) -> Column[DType]:
         Column
         """
 
-    def __sub__(self, other: Column[DType] | Scalar) -> Column[DType]:
+    def __sub__(self, other: Column[DType] | Any) -> Column[DType]:
         """
         Subtract `other` column or scalar from this column.
 
@@ -311,7 +311,7 @@ def __sub__(self, other: Column[DType] | Scalar) -> Column[DType]:
         Column
         """
 
-    def __mul__(self, other: Column[Any] | Scalar) -> Column[Any]:
+    def __mul__(self, other: Column[Any] | Any) -> Column[Any]:
         """
         Multiply `other` column or scalar with this column.
 
@@ -327,7 +327,7 @@ def __mul__(self, other: Column[Any] | Scalar) -> Column[Any]:
         Column
         """
 
-    def __truediv__(self, other: Column[Any] | Scalar) -> Column[Any]:
+    def __truediv__(self, other: Column[Any] | Any) -> Column[Any]:
         """
         Divide this column by `other` column or scalar. True division, returns floats.
 
@@ -343,7 +343,7 @@ def __truediv__(self, other: Column[Any] | Scalar) -> Column[Any]:
         Column
         """
 
-    def __floordiv__(self, other: Column[Any] | Scalar) -> Column[Any]:
+    def __floordiv__(self, other: Column[Any] | Any) -> Column[Any]:
         """
         Floor-divide `other` column or scalar to this column.
 
@@ -359,7 +359,7 @@ def __floordiv__(self, other: Column[Any] | Scalar) -> Column[Any]:
         Column
         """
 
-    def __pow__(self, other: Column[Any] | Scalar) -> Column[Any]:
+    def __pow__(self, other: Column[Any] | Any) -> Column[Any]:
         """
         Raise this column to the power of `other`.
 
@@ -375,7 +375,7 @@ def __pow__(self, other: Column[Any] | Scalar) -> Column[Any]:
         Column
         """
 
-    def __mod__(self, other: Column[Any] | Scalar) -> Column[Any]:
+    def __mod__(self, other: Column[Any] | Any) -> Column[Any]:
         """
         Returns modulus of this column by `other` (`%` operator).
 
@@ -391,7 +391,7 @@ def __mod__(self, other: Column[Any] | Scalar) -> Column[Any]:
         Column
         """
 
-    def __divmod__(self, other: Column[Any] | Scalar) -> tuple[Column[Any], Column[Any]]:
+    def __divmod__(self, other: Column[Any] | Any) -> tuple[Column[Any], Column[Any]]:
         """
         Return quotient and remainder of integer division. See `divmod` builtin function.
 
@@ -437,32 +437,32 @@ def all(self: Column[Bool], *, skip_nulls: bool = True) -> bool:
             If column is not boolean.
         """
 
-    def min(self, *, skip_nulls: bool = True) -> Scalar:
+    def min(self, *, skip_nulls: bool = True) -> Any:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def max(self, *, skip_nulls: bool = True) -> Scalar:
+    def max(self, *, skip_nulls: bool = True) -> Any:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def sum(self, *, skip_nulls: bool = True) -> Scalar:
+    def sum(self, *, skip_nulls: bool = True) -> Any:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
         column.
         """
 
-    def prod(self, *, skip_nulls: bool = True) -> Scalar:
+    def prod(self, *, skip_nulls: bool = True) -> Any:
         """
         Reduction returns a scalar. Must be supported for numerical data types.
         The returned value has the same dtype as the column.
         """
 
-    def median(self, *, skip_nulls: bool = True) -> Scalar:
+    def median(self, *, skip_nulls: bool = True) -> Any:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -470,7 +470,7 @@ def median(self, *, skip_nulls: bool = True) -> Scalar:
         dtypes.
         """
 
-    def mean(self, *, skip_nulls: bool = True) -> Scalar:
+    def mean(self, *, skip_nulls: bool = True) -> Any:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -478,7 +478,7 @@ def mean(self, *, skip_nulls: bool = True) -> Scalar:
         dtypes.
         """
 
-    def std(self, *, skip_nulls: bool = True) -> Scalar:
+    def std(self, *, skip_nulls: bool = True) -> Any:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -486,7 +486,7 @@ def std(self, *, skip_nulls: bool = True) -> Scalar:
         dtypes.
         """
 
-    def var(self, *, skip_nulls: bool = True) -> Scalar:
+    def var(self, *, skip_nulls: bool = True) -> Any:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 1c6dd9ec..04131ff8 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -7,7 +7,7 @@
 if TYPE_CHECKING:
     from .column_object import Column
     from .groupby_object import GroupBy
-    from . import Bool, null, Scalar
+    from . import Bool, null
 
 
 
@@ -293,7 +293,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: DataFrame | Scalar) -> DataFrame:  # type: ignore[override]
+    def __eq__(self, other: DataFrame | Any) -> DataFrame:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -312,7 +312,7 @@ def __eq__(self, other: DataFrame | Scalar) -> DataFrame:  # type: ignore[overri
         """
         ...
 
-    def __ne__(self, other: DataFrame | Scalar) -> DataFrame:  # type: ignore[override]
+    def __ne__(self, other: DataFrame | Any) -> DataFrame:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -331,7 +331,7 @@ def __ne__(self, other: DataFrame | Scalar) -> DataFrame:  # type: ignore[overri
         """
         ...
 
-    def __ge__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __ge__(self, other: DataFrame | Any) -> DataFrame:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -348,7 +348,7 @@ def __ge__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         ...
 
-    def __gt__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __gt__(self, other: DataFrame | Any) -> DataFrame:
         """
         Compare for "greater than" `other`.
 
@@ -365,7 +365,7 @@ def __gt__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         ...
 
-    def __le__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __le__(self, other: DataFrame | Any) -> DataFrame:
         """
         Compare for "less than or equal to" `other`.
 
@@ -382,7 +382,7 @@ def __le__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         ...
 
-    def __lt__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __lt__(self, other: DataFrame | Any) -> DataFrame:
         """
         Compare for "less than" `other`.
 
@@ -441,7 +441,7 @@ def __or__(self, other: DataFrame | bool) -> DataFrame:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __add__(self, other: DataFrame | Any) -> DataFrame:
         """
         Add `other` dataframe or scalar to this dataframe.
 
@@ -458,7 +458,7 @@ def __add__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         ...
 
-    def __sub__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __sub__(self, other: DataFrame | Any) -> DataFrame:
         """
         Subtract `other` dataframe or scalar from this dataframe.
 
@@ -475,7 +475,7 @@ def __sub__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         ...
 
-    def __mul__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __mul__(self, other: DataFrame | Any) -> DataFrame:
         """
         Multiply  `other` dataframe or scalar with this dataframe.
 
@@ -492,7 +492,7 @@ def __mul__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         ...
 
-    def __truediv__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __truediv__(self, other: DataFrame | Any) -> DataFrame:
         """
         Divide  this dataframe by `other` dataframe or scalar. True division, returns floats.
 
@@ -509,7 +509,7 @@ def __truediv__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         ...
 
-    def __floordiv__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __floordiv__(self, other: DataFrame | Any) -> DataFrame:
         """
         Floor-divide (returns integers) this dataframe by `other` dataframe or scalar.
 
@@ -526,7 +526,7 @@ def __floordiv__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         ...
 
-    def __pow__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __pow__(self, other: DataFrame | Any) -> DataFrame:
         """
         Raise this dataframe to the power of `other`.
 
@@ -543,7 +543,7 @@ def __pow__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         ...
 
-    def __mod__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __mod__(self, other: DataFrame | Any) -> DataFrame:
         """
         Return modulus of this dataframe by `other` (`%` operator).
 
@@ -560,7 +560,7 @@ def __mod__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         ...
 
-    def __divmod__(self, other: DataFrame | Scalar) -> tuple[DataFrame, DataFrame]:
+    def __divmod__(self, other: DataFrame | Any) -> tuple[DataFrame, DataFrame]:
         """
         Return quotient and remainder of integer division. See `divmod` builtin function.
 
diff --git a/spec/design_topics/python_builtin_types.md b/spec/design_topics/python_builtin_types.md
index 567baca3..1c78dd9a 100644
--- a/spec/design_topics/python_builtin_types.md
+++ b/spec/design_topics/python_builtin_types.md
@@ -12,7 +12,7 @@ the `float` it is documented to return, in combination with the `__gt__` method
 
 ```python
 class DataFrame:
-    def __gt__(self, other: DataFrame | Scalar) -> DataFrame:
+    def __gt__(self, other: DataFrame | Any) -> DataFrame:
         ...
     def get_column_by_name(self, name: str, /) -> Column:
         ...

From 56c62935c953c5928d22fd05ccc7d00431a0eb7c Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Jun 2023 08:53:31 +0100
Subject: [PATCH 27/38] make self: Column[DType] explicit

---
 .../dataframe_api/column_object.py            | 20 +++++++++----------
 .../dataframe_api/dataframe_object.py         |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 9da43f74..5231bcec 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -137,7 +137,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: Column[DType] | Any) -> Column[Bool]:  # type: ignore[override]
+    def __eq__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -155,7 +155,7 @@ def __eq__(self, other: Column[DType] | Any) -> Column[Bool]:  # type: ignore[ov
         Column
         """
 
-    def __ne__(self, other: Column[DType] | Any) -> Column[Bool]:  # type: ignore[override]
+    def __ne__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -173,7 +173,7 @@ def __ne__(self, other: Column[DType] | Any) -> Column[Bool]:  # type: ignore[ov
         Column
         """
 
-    def __ge__(self, other: Column[DType] | Any) -> Column[Bool]:
+    def __ge__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -189,7 +189,7 @@ def __ge__(self, other: Column[DType] | Any) -> Column[Bool]:
         Column
         """
 
-    def __gt__(self, other: Column[DType] | Any) -> Column[Bool]:
+    def __gt__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
         """
         Compare for "greater than" `other`.
 
@@ -205,7 +205,7 @@ def __gt__(self, other: Column[DType] | Any) -> Column[Bool]:
         Column
         """
 
-    def __le__(self, other: Column[DType] | Any) -> Column[Bool]:
+    def __le__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
         """
         Compare for "less than or equal to" `other`.
 
@@ -221,7 +221,7 @@ def __le__(self, other: Column[DType] | Any) -> Column[Bool]:
         Column
         """
 
-    def __lt__(self, other: Column[DType] | Any) -> Column[Bool]:
+    def __lt__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
         """
         Compare for "less than" `other`.
 
@@ -279,7 +279,7 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self, other: Column[DType] | Any) -> Column[DType]:
+    def __add__(self: Column[DType], other: Column[DType] | Any) -> Column[DType]:
         """
         Add `other` column or scalar to this column.
 
@@ -295,7 +295,7 @@ def __add__(self, other: Column[DType] | Any) -> Column[DType]:
         Column
         """
 
-    def __sub__(self, other: Column[DType] | Any) -> Column[DType]:
+    def __sub__(self: Column[DType], other: Column[DType] | Any) -> Column[DType]:
         """
         Subtract `other` column or scalar from this column.
 
@@ -532,7 +532,7 @@ def is_nan(self) -> Column[Bool]:
         In particular, does not check for `np.timedelta64('NaT')`.
         """
 
-    def is_in(self, values: Column[DType]) -> Column[Bool]:
+    def is_in(self: Column[DType], values: Column[DType]) -> Column[Bool]:
         """
         Indicate whether the value at each row matches any value in `values`.
 
@@ -571,7 +571,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[Any]:
         """
         ...
 
-    def fill_nan(self, value: float | 'null', /) -> Column[DType]:
+    def fill_nan(self: Column[DType], value: float | 'null', /) -> Column[DType]:
         """
         Fill floating point ``nan`` values with the given fill value.
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 04131ff8..ac0e0373 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -104,7 +104,7 @@ def groupby(self, keys: Sequence[str], /) -> GroupBy:
         """
         ...
 
-    def get_column_by_name(self, name: str, /) -> Column[DType]:
+    def get_column_by_name(self, name: str, /) -> Column[Any]:
         """
         Select a column by name.
 

From 314ab42fb7f1626f47ab4ca974c065367e2b6167 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Jun 2023 08:57:24 +0100
Subject: [PATCH 28/38] preserve Column[int] in docs

---
 spec/API_specification/dataframe_api/column_object.py    | 6 +++---
 spec/API_specification/dataframe_api/dataframe_object.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 5231bcec..48659618 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -85,7 +85,7 @@ def get_rows(self: Column[DType], indices: Column[Any]) -> Column[DType]:
 
         Parameters
         ----------
-        indices : Column[IntDType]
+        indices : Column[int]
             Positions of rows to select.
         """
         ...
@@ -133,7 +133,7 @@ def sorted_indices(
 
         Returns
         -------
-        Column[IntDType]
+        Column[int]
         """
         ...
 
@@ -556,7 +556,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[Any]:
 
         Returns
         -------
-        Column[IntDType]
+        Column[int]
             Indices corresponding to unique values.
 
         Notes
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index ac0e0373..18d4e8dd 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -148,7 +148,7 @@ def get_rows(self, indices: Column[Any]) -> DataFrame:
 
         Parameters
         ----------
-        indices : Column[IntDType]
+        indices : Column[int]
             Positions of rows to select.
 
         Returns
@@ -284,7 +284,7 @@ def sorted_indices(
 
         Returns
         -------
-        Column[IntDType]
+        Column[int]
     
         Raises
         ------

From 4ddbae123beddf3af0e882df09615b991be161e4 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Jun 2023 09:02:46 +0100
Subject: [PATCH 29/38] simplify further

---
 spec/API_specification/dataframe_api/column_object.py | 4 ++--
 spec/conf.py                                          | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 48659618..8f33de48 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -279,7 +279,7 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self: Column[DType], other: Column[DType] | Any) -> Column[DType]:
+    def __add__(self: Column[Any], other: Column[Any] | Any) -> Column[Any]:
         """
         Add `other` column or scalar to this column.
 
@@ -295,7 +295,7 @@ def __add__(self: Column[DType], other: Column[DType] | Any) -> Column[DType]:
         Column
         """
 
-    def __sub__(self: Column[DType], other: Column[DType] | Any) -> Column[DType]:
+    def __sub__(self: Column[Any], other: Column[Any] | Any) -> Column[Any]:
         """
         Subtract `other` column or scalar from this column.
 
diff --git a/spec/conf.py b/spec/conf.py
index da73887b..86ae0c06 100644
--- a/spec/conf.py
+++ b/spec/conf.py
@@ -82,10 +82,7 @@
     ('py:class', 'enum.Enum'),
     ('py:class', 'ellipsis'),
     ('py:class', 'Scalar'),
-    ('py:class', 'DTypeT'),
     ('py:class', 'Bool'),
-    ('py:class', 'IntDType'),
-    ('py:class', 'FloatDType'),
 ]
 # NOTE: this alias handling isn't used yet - added in anticipation of future
 #       need based on dataframe API aliases.

From ea7a81a5dc6bd6000e0e63cc00778643eeeefd83 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Jun 2023 09:07:32 +0100
Subject: [PATCH 30/38] reduce diff

---
 spec/API_specification/dataframe_api/__init__.py         | 1 +
 spec/API_specification/dataframe_api/dataframe_object.py | 2 +-
 spec/API_specification/dataframe_api/groupby_object.py   | 1 -
 spec/API_specification/index.rst                         | 4 ----
 4 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 08015abb..6e9b3e7a 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -29,6 +29,7 @@
     "UInt8",
     "Float64",
     "Float32",
+    "Bool",
 ]
 
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 18d4e8dd..1ffc7988 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -577,7 +577,7 @@ def __divmod__(self, other: DataFrame | Any) -> tuple[DataFrame, DataFrame]:
         """
         ...
 
-    def __invert__(self: DataFrame) -> DataFrame:
+    def __invert__(self) -> DataFrame:
         """
         Invert truthiness of (boolean) elements.
 
diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py
index 096c4e11..5a4d2a73 100644
--- a/spec/API_specification/dataframe_api/groupby_object.py
+++ b/spec/API_specification/dataframe_api/groupby_object.py
@@ -9,7 +9,6 @@
 __all__ = ['GroupBy']
 
 
-
 class GroupBy:
     """
     GroupBy object.
diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst
index 165266a4..b90d3320 100644
--- a/spec/API_specification/index.rst
+++ b/spec/API_specification/index.rst
@@ -16,7 +16,6 @@ of objects and functions in the top-level namespace. The latter are:
    __dataframe_api_version__
    is_null
    null
-   DType
    Int64
    Int32
    Int16
@@ -28,9 +27,6 @@ of objects and functions in the top-level namespace. The latter are:
    Float64
    Float32
    Bool
-   DTypeT
-   IntDType
-   FloatDType
 
 The ``DataFrame``, ``Column`` and ``GroupBy`` objects have the following
 methods and attributes:

From 4a740b931b382db7868146257665224004d6e4cd Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Jun 2023 09:14:17 +0100
Subject: [PATCH 31/38] reduce diff

---
 spec/API_specification/dataframe_api/_types.py           | 1 -
 spec/API_specification/dataframe_api/dataframe_object.py | 5 +----
 spec/API_specification/dataframe_api/groupby_object.py   | 2 +-
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py
index 030d920c..2874ba4c 100644
--- a/spec/API_specification/dataframe_api/_types.py
+++ b/spec/API_specification/dataframe_api/_types.py
@@ -17,7 +17,6 @@
     TypeVar,
     Union,
     Protocol,
-    TYPE_CHECKING,
 )
 from enum import Enum
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 1ffc7988..759f602f 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -1,8 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, Literal, Mapping, Sequence, Union, TYPE_CHECKING, NoReturn, TypeVar, Generic
-
-from ._types import DType
+from typing import Any, Literal, Mapping, Sequence, Union, TYPE_CHECKING, NoReturn
 
 if TYPE_CHECKING:
     from .column_object import Column
@@ -10,7 +8,6 @@
     from . import Bool, null
 
 
-
 __all__ = ["DataFrame"]
 
 
diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py
index 5a4d2a73..8787edf2 100644
--- a/spec/API_specification/dataframe_api/groupby_object.py
+++ b/spec/API_specification/dataframe_api/groupby_object.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, TypeVar, Generic, Any
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from .dataframe_object import DataFrame

From d6a6e87fde416024a568d609f0c3d48120f9745c Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Jun 2023 09:23:39 +0100
Subject: [PATCH 32/38] get docs building again

---
 spec/API_specification/dataframe_api/__init__.py      | 2 +-
 spec/API_specification/dataframe_api/column_object.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 6e9b3e7a..9071939c 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -13,7 +13,7 @@
 __all__ = [
     "DataFrame",
     "Column",
-    "__dataframe_api_version",
+    "__dataframe_api_version__",
     "column_from_sequence",
     "concat",
     "dataframe_from_dict",
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 8f33de48..06c9a5b7 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -2,9 +2,10 @@
 
 from typing import Any,NoReturn, Sequence, TYPE_CHECKING, Literal, Generic, TypeVar
 
+from ._types import DType
+
 if TYPE_CHECKING:
     from . import Bool, null
-    from ._types import DType
 
 
 __all__ = ['Column']

From 526a5d7cc33ab4bfd1a4488de4af77a6becd44e7 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 29 Jun 2023 09:35:51 +0100
Subject: [PATCH 33/38] further reduce diff

---
 spec/API_specification/dataframe_api/__init__.py         | 4 ++--
 spec/API_specification/dataframe_api/column_object.py    | 2 +-
 spec/API_specification/dataframe_api/dataframe_object.py | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 9071939c..ed664363 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -3,7 +3,7 @@
 """
 from __future__ import annotations
 
-from typing import Mapping, Sequence, Any, Generic, TypeVar
+from typing import Mapping, Sequence, Any
 
 from .column_object import *
 from .dataframe_object import DataFrame
@@ -11,9 +11,9 @@
 from ._types import DType
 
 __all__ = [
+    "__dataframe_api_version__",
     "DataFrame",
     "Column",
-    "__dataframe_api_version__",
     "column_from_sequence",
     "concat",
     "dataframe_from_dict",
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 06c9a5b7..3880406d 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any,NoReturn, Sequence, TYPE_CHECKING, Literal, Generic, TypeVar
+from typing import Any,NoReturn, Sequence, TYPE_CHECKING, Literal, Generic
 
 from ._types import DType
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 759f602f..f2afaf83 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -2,6 +2,7 @@
 
 from typing import Any, Literal, Mapping, Sequence, Union, TYPE_CHECKING, NoReturn
 
+
 if TYPE_CHECKING:
     from .column_object import Column
     from .groupby_object import GroupBy

From 5486e22ea916a8e8298dde5e0c3647c3a8494660 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Fri, 7 Jul 2023 13:30:09 +0100
Subject: [PATCH 34/38] introduce Scalar type alias

---
 .../API_specification/dataframe_api/_types.py |  5 +++-
 .../dataframe_api/column_object.py            | 21 +++++++-------
 .../dataframe_api/dataframe_object.py         | 29 ++++++++++---------
 spec/design_topics/python_builtin_types.md    |  2 +-
 4 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py
index 2874ba4c..dde7795a 100644
--- a/spec/API_specification/dataframe_api/_types.py
+++ b/spec/API_specification/dataframe_api/_types.py
@@ -20,8 +20,11 @@
 )
 from enum import Enum
 
+# Type alias: Mypy needs Any, but for readability we need to make clear this
+# is a Python scalar (i.e., an instance of `bool`, `int`, `float`, `str`, etc.)
+Scalar = Any
+
 array = TypeVar("array")
-Scalar = TypeVar("Scalar")
 device = TypeVar("device")
 DType = TypeVar("DType")
 SupportsDLPack = TypeVar("SupportsDLPack")
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index f394a7f8..7d8300b8 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -6,6 +6,7 @@
 
 if TYPE_CHECKING:
     from . import Bool, null
+    from ._types import Scalar
 
 
 __all__ = ['Column']
@@ -91,7 +92,7 @@ def get_rows(self: Column[DType], indices: Column[Any]) -> Column[DType]:
         """
         ...
 
-    def get_value(self, row_number: int) -> Any:
+    def get_value(self, row_number: int) -> Scalar:
         """
         Select the value at a row number, similar to `ndarray.__getitem__(<int>)`.
 
@@ -138,7 +139,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:  # type: ignore[override]
+    def __eq__(self, other: Column[Any] | Scalar) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -280,7 +281,7 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self: Column[Any], other: Column[Any] | Any) -> Column[Any]:
+    def __add__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]:
         """
         Add `other` column or scalar to this column.
 
@@ -296,7 +297,7 @@ def __add__(self: Column[Any], other: Column[Any] | Any) -> Column[Any]:
         Column
         """
 
-    def __sub__(self: Column[Any], other: Column[Any] | Any) -> Column[Any]:
+    def __sub__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]:
         """
         Subtract `other` column or scalar from this column.
 
@@ -312,7 +313,7 @@ def __sub__(self: Column[Any], other: Column[Any] | Any) -> Column[Any]:
         Column
         """
 
-    def __mul__(self, other: Column[Any] | Any) -> Column[Any]:
+    def __mul__(self, other: Column[Any] | Scalar) -> Column[Any]:
         """
         Multiply `other` column or scalar with this column.
 
@@ -328,7 +329,7 @@ def __mul__(self, other: Column[Any] | Any) -> Column[Any]:
         Column
         """
 
-    def __truediv__(self, other: Column[Any] | Any) -> Column[Any]:
+    def __truediv__(self, other: Column[Any] | Scalar) -> Column[Any]:
         """
         Divide this column by `other` column or scalar. True division, returns floats.
 
@@ -344,7 +345,7 @@ def __truediv__(self, other: Column[Any] | Any) -> Column[Any]:
         Column
         """
 
-    def __floordiv__(self, other: Column[Any] | Any) -> Column[Any]:
+    def __floordiv__(self, other: Column[Any] | Scalar) -> Column[Any]:
         """
         Floor-divide `other` column or scalar to this column.
 
@@ -360,7 +361,7 @@ def __floordiv__(self, other: Column[Any] | Any) -> Column[Any]:
         Column
         """
 
-    def __pow__(self, other: Column[Any] | Any) -> Column[Any]:
+    def __pow__(self, other: Column[Any] | Scalar) -> Column[Any]:
         """
         Raise this column to the power of `other`.
 
@@ -380,7 +381,7 @@ def __pow__(self, other: Column[Any] | Any) -> Column[Any]:
         Column
         """
 
-    def __mod__(self, other: Column[Any] | Any) -> Column[Any]:
+    def __mod__(self, other: Column[Any] | Scalar) -> Column[Any]:
         """
         Returns modulus of this column by `other` (`%` operator).
 
@@ -396,7 +397,7 @@ def __mod__(self, other: Column[Any] | Any) -> Column[Any]:
         Column
         """
 
-    def __divmod__(self, other: Column[Any] | Any) -> tuple[Column[Any], Column[Any]]:
+    def __divmod__(self, other: Column[Any] | Scalar) -> tuple[Column[Any], Column[Any]]:
         """
         Return quotient and remainder of integer division. See `divmod` builtin function.
 
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index dbc84ac4..827d0f4c 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -7,6 +7,7 @@
     from .column_object import Column
     from .groupby_object import GroupBy
     from . import Bool, null
+    from ._types import Scalar
 
 
 __all__ = ["DataFrame"]
@@ -291,7 +292,7 @@ def sorted_indices(
         """
         ...
 
-    def __eq__(self, other: DataFrame | Any) -> DataFrame:  # type: ignore[override]
+    def __eq__(self, other: DataFrame | Scalar) -> DataFrame:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -310,7 +311,7 @@ def __eq__(self, other: DataFrame | Any) -> DataFrame:  # type: ignore[override]
         """
         ...
 
-    def __ne__(self, other: DataFrame | Any) -> DataFrame:  # type: ignore[override]
+    def __ne__(self, other: DataFrame | Scalar) -> DataFrame:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -329,7 +330,7 @@ def __ne__(self, other: DataFrame | Any) -> DataFrame:  # type: ignore[override]
         """
         ...
 
-    def __ge__(self, other: DataFrame | Any) -> DataFrame:
+    def __ge__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -346,7 +347,7 @@ def __ge__(self, other: DataFrame | Any) -> DataFrame:
         """
         ...
 
-    def __gt__(self, other: DataFrame | Any) -> DataFrame:
+    def __gt__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Compare for "greater than" `other`.
 
@@ -363,7 +364,7 @@ def __gt__(self, other: DataFrame | Any) -> DataFrame:
         """
         ...
 
-    def __le__(self, other: DataFrame | Any) -> DataFrame:
+    def __le__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Compare for "less than or equal to" `other`.
 
@@ -380,7 +381,7 @@ def __le__(self, other: DataFrame | Any) -> DataFrame:
         """
         ...
 
-    def __lt__(self, other: DataFrame | Any) -> DataFrame:
+    def __lt__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Compare for "less than" `other`.
 
@@ -439,7 +440,7 @@ def __or__(self, other: DataFrame | bool) -> DataFrame:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self, other: DataFrame | Any) -> DataFrame:
+    def __add__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Add `other` dataframe or scalar to this dataframe.
 
@@ -456,7 +457,7 @@ def __add__(self, other: DataFrame | Any) -> DataFrame:
         """
         ...
 
-    def __sub__(self, other: DataFrame | Any) -> DataFrame:
+    def __sub__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Subtract `other` dataframe or scalar from this dataframe.
 
@@ -473,7 +474,7 @@ def __sub__(self, other: DataFrame | Any) -> DataFrame:
         """
         ...
 
-    def __mul__(self, other: DataFrame | Any) -> DataFrame:
+    def __mul__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Multiply  `other` dataframe or scalar with this dataframe.
 
@@ -490,7 +491,7 @@ def __mul__(self, other: DataFrame | Any) -> DataFrame:
         """
         ...
 
-    def __truediv__(self, other: DataFrame | Any) -> DataFrame:
+    def __truediv__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Divide  this dataframe by `other` dataframe or scalar. True division, returns floats.
 
@@ -507,7 +508,7 @@ def __truediv__(self, other: DataFrame | Any) -> DataFrame:
         """
         ...
 
-    def __floordiv__(self, other: DataFrame | Any) -> DataFrame:
+    def __floordiv__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Floor-divide (returns integers) this dataframe by `other` dataframe or scalar.
 
@@ -524,7 +525,7 @@ def __floordiv__(self, other: DataFrame | Any) -> DataFrame:
         """
         ...
 
-    def __pow__(self, other: DataFrame | Any) -> DataFrame:
+    def __pow__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Raise this dataframe to the power of `other`.
 
@@ -545,7 +546,7 @@ def __pow__(self, other: DataFrame | Any) -> DataFrame:
         """
         ...
 
-    def __mod__(self, other: DataFrame | Any) -> DataFrame:
+    def __mod__(self, other: DataFrame | Scalar) -> DataFrame:
         """
         Return modulus of this dataframe by `other` (`%` operator).
 
@@ -562,7 +563,7 @@ def __mod__(self, other: DataFrame | Any) -> DataFrame:
         """
         ...
 
-    def __divmod__(self, other: DataFrame | Any) -> tuple[DataFrame, DataFrame]:
+    def __divmod__(self, other: DataFrame | Scalar) -> tuple[DataFrame, DataFrame]:
         """
         Return quotient and remainder of integer division. See `divmod` builtin function.
 
diff --git a/spec/design_topics/python_builtin_types.md b/spec/design_topics/python_builtin_types.md
index 1c78dd9a..567baca3 100644
--- a/spec/design_topics/python_builtin_types.md
+++ b/spec/design_topics/python_builtin_types.md
@@ -12,7 +12,7 @@ the `float` it is documented to return, in combination with the `__gt__` method
 
 ```python
 class DataFrame:
-    def __gt__(self, other: DataFrame | Any) -> DataFrame:
+    def __gt__(self, other: DataFrame | Scalar) -> DataFrame:
         ...
     def get_column_by_name(self, name: str, /) -> Column:
         ...

From 6a8a428bfc591bba52480ef466820edd3ca955fd Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Fri, 7 Jul 2023 13:33:24 +0100
Subject: [PATCH 35/38] fixup

---
 .../API_specification/dataframe_api/groupby_object.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/spec/API_specification/dataframe_api/groupby_object.py b/spec/API_specification/dataframe_api/groupby_object.py
index 9e41631a..c020be9d 100644
--- a/spec/API_specification/dataframe_api/groupby_object.py
+++ b/spec/API_specification/dataframe_api/groupby_object.py
@@ -43,17 +43,10 @@ def median(self, *, skip_nulls: bool = True) -> DataFrame:
     def mean(self, *, skip_nulls: bool = True) -> DataFrame:
         ...
 
-<<<<<<< HEAD
-    def std(self, *, skip_nulls: bool = True) -> DataFrame:
+    def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DataFrame:
         ...
 
-    def var(self, *, skip_nulls: bool = True) -> DataFrame:
-=======
-    def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> "DataFrame":
-        ...
-
-    def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> "DataFrame":
->>>>>>> upstream/main
+    def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> DataFrame:
         ...
 
     def size(self) -> DataFrame:

From e2d3068cc390a77d55de330dcd90e1006cced393 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Fri, 7 Jul 2023 13:34:45 +0100
Subject: [PATCH 36/38] fixup mypy

---
 spec/API_specification/dataframe_api/column_object.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 7d8300b8..674d6d4f 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -527,26 +527,26 @@ def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Any:
             Whether to skip null values.
         """
 
-    def cumulative_max(self) -> Column:
+    def cumulative_max(self: Column[DType]) -> Column[DType]:
         """
         Reduction returns a Column. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def cumulative_min(self) -> Column:
+    def cumulative_min(self: Column[DType]) -> Column[DType]:
         """
         Reduction returns a Column. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def cumulative_sum(self) -> Column:
+    def cumulative_sum(self: Column[DType]) -> Column[DType]:
         """
         Reduction returns a Column. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
         column.
         """
 
-    def cumulative_prod(self) -> Column:
+    def cumulative_prod(self: Column[DType]) -> Column[DType]:
         """
         Reduction returns a Column. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
@@ -644,7 +644,7 @@ def fill_nan(self: Column[DType], value: float | 'null', /) -> Column[DType]:
         """
         ...
 
-    def fill_null(self, value: Scalar, /) -> Column:
+    def fill_null(self: Column[DType], value: Scalar, /) -> Column[DType]:
         """
         Fill null values with the given fill value.
 

From 59140c28d0d7af73d7dad1a6096315a9b7f57887 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Fri, 7 Jul 2023 13:36:13 +0100
Subject: [PATCH 37/38] reduce diff

---
 spec/API_specification/dataframe_api/column_object.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index 674d6d4f..c4d7656b 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -157,7 +157,7 @@ def __eq__(self, other: Column[Any] | Scalar) -> Column[Bool]:  # type: ignore[o
         Column
         """
 
-    def __ne__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:  # type: ignore[override]
+    def __ne__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -175,7 +175,7 @@ def __ne__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:  #
         Column
         """
 
-    def __ge__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
+    def __ge__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:
         """
         Compare for "greater than or equal to" `other`.
 
@@ -191,7 +191,7 @@ def __ge__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
         Column
         """
 
-    def __gt__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
+    def __gt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:
         """
         Compare for "greater than" `other`.
 
@@ -207,7 +207,7 @@ def __gt__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
         Column
         """
 
-    def __le__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
+    def __le__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:
         """
         Compare for "less than or equal to" `other`.
 
@@ -223,7 +223,7 @@ def __le__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
         Column
         """
 
-    def __lt__(self: Column[DType], other: Column[DType] | Any) -> Column[Bool]:
+    def __lt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:
         """
         Compare for "less than" `other`.
 

From 9fd840a2cc2e3070553e6650c6e861dbd1343442 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Fri, 7 Jul 2023 13:37:28 +0100
Subject: [PATCH 38/38] fix return types;

---
 .../dataframe_api/column_object.py               | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
index c4d7656b..ffcaca31 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/column_object.py
@@ -443,32 +443,32 @@ def all(self: Column[Bool], *, skip_nulls: bool = True) -> bool:
             If column is not boolean.
         """
 
-    def min(self, *, skip_nulls: bool = True) -> Any:
+    def min(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def max(self, *, skip_nulls: bool = True) -> Any:
+    def max(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Any data type that supports comparisons
         must be supported. The returned value has the same dtype as the column.
         """
 
-    def sum(self, *, skip_nulls: bool = True) -> Any:
+    def sum(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
         column.
         """
 
-    def prod(self, *, skip_nulls: bool = True) -> Any:
+    def prod(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical data types.
         The returned value has the same dtype as the column.
         """
 
-    def median(self, *, skip_nulls: bool = True) -> Any:
+    def median(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -476,7 +476,7 @@ def median(self, *, skip_nulls: bool = True) -> Any:
         dtypes.
         """
 
-    def mean(self, *, skip_nulls: bool = True) -> Any:
+    def mean(self, *, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -484,7 +484,7 @@ def mean(self, *, skip_nulls: bool = True) -> Any:
         dtypes.
         """
 
-    def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Any:
+    def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -510,7 +510,7 @@ def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Any:
             Whether to skip null values.
         """
 
-    def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Any:
+    def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and