diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py index 1e7d57b4..0c34c2f3 100644 --- a/spec/API_specification/dataframe_api/__init__.py +++ b/spec/API_specification/dataframe_api/__init__.py @@ -3,12 +3,15 @@ """ from __future__ import annotations -from typing import Mapping, Sequence, Any +from typing import Mapping, Sequence, Any, TYPE_CHECKING from .column_object import * from .dataframe_object import DataFrame from .groupby_object import * -from ._types import DType +from .dtypes import * + +if TYPE_CHECKING: + from ._types import DType __all__ = [ "__dataframe_api_version__", @@ -63,7 +66,7 @@ def concat(dataframes: Sequence[DataFrame]) -> DataFrame: """ ... -def column_from_sequence(sequence: Sequence[Any], *, dtype: Any, name: str = '', api_version: str | None = None) -> Column[Any]: +def column_from_sequence(sequence: Sequence[Any], *, dtype: DType, name: str = '', api_version: str | None = None) -> Column: """ Construct Column from sequence of elements. @@ -91,7 +94,7 @@ def column_from_sequence(sequence: Sequence[Any], *, dtype: Any, name: str = '', """ ... -def dataframe_from_dict(data: Mapping[str, Column[Any]], *, api_version: str | None = None) -> DataFrame: +def dataframe_from_dict(data: Mapping[str, Column], *, api_version: str | None = None) -> DataFrame: """ Construct DataFrame from map of column names to Columns. @@ -123,7 +126,7 @@ def dataframe_from_dict(data: Mapping[str, Column[Any]], *, api_version: str | N ... -def column_from_1d_array(array: Any, *, dtype: Any, name: str = '', api_version: str | None = None) -> Column[Any]: +def column_from_1d_array(array: Any, *, dtype: DType, name: str = '', api_version: str | None = None) -> Column: """ Construct Column from 1D array. @@ -232,51 +235,13 @@ def is_null(value: object, /) -> bool: """ -########## -# Dtypes # -########## - -class Int64: - """Integer type with 64 bits of precision.""" - -class Int32: - """Integer type with 32 bits of precision.""" - -class Int16: - """Integer type with 16 bits of precision.""" - -class Int8: - """Integer type with 8 bits of precision.""" - -class UInt64: - """Unsigned integer type with 64 bits of precision.""" - -class UInt32: - """Unsigned integer type with 32 bits of precision.""" - -class UInt16: - """Unsigned integer type with 16 bits of precision.""" - -class UInt8: - """Unsigned integer type with 8 bits of precision.""" - -class Float64: - """Floating point type with 64 bits of precision.""" - -class Float32: - """Floating point type with 32 bits of precision.""" - -class Bool: - """Boolean type with 8 bits of precision.""" - - -def is_dtype(dtype: Any, kind: str | tuple[str, ...]) -> bool: +def is_dtype(dtype: DType, kind: str | tuple[str, ...]) -> bool: """ Returns a boolean indicating whether a provided dtype is of a specified data type “kind”. Parameters ---------- - dtype: Any + dtype: DType The input dtype. kind: str data type kind. diff --git a/spec/API_specification/dataframe_api/_types.py b/spec/API_specification/dataframe_api/_types.py index 2b6d7d08..2b43e115 100644 --- a/spec/API_specification/dataframe_api/_types.py +++ b/spec/API_specification/dataframe_api/_types.py @@ -1,8 +1,5 @@ """ Types for type annotations used in the dataframe API standard. - -The type variables should be replaced with the actual types for a given -library, e.g., for Pandas TypeVar('DataFrame') would be replaced with pd.DataFrame. """ from __future__ import annotations @@ -14,12 +11,28 @@ Optional, Sequence, Tuple, - TypeVar, Union, - Protocol, + TYPE_CHECKING, ) from enum import Enum +if TYPE_CHECKING: + from .dtypes import ( + Bool, + Float64, + Float32, + Int64, + Int32, + Int16, + Int8, + UInt64, + UInt32, + UInt16, + UInt8, + ) + + DType = Union[Bool, Float64, Float32, Int64, Int32, Int16, Int8, UInt64, UInt32, UInt16, UInt8] + # Type alias: Mypy needs Any, but for readability we need to make clear this # is a Python scalar (i.e., an instance of `bool`, `int`, `float`, `str`, etc.) Scalar = Any @@ -27,26 +40,6 @@ # It is not valid as a type. NullType = Any -array = TypeVar("array") -device = TypeVar("device") -DType = TypeVar("DType") -SupportsDLPack = TypeVar("SupportsDLPack") -SupportsBufferProtocol = TypeVar("SupportsBufferProtocol") -PyCapsule = TypeVar("PyCapsule") -# ellipsis cannot actually be imported from anywhere, so include a dummy here -# to keep pyflakes happy. https://github.com/python/typeshed/issues/3556 -ellipsis = TypeVar("ellipsis") - -_T_co = TypeVar("_T_co", covariant=True) - - -class NestedSequence(Protocol[_T_co]): - def __getitem__(self, key: int, /) -> Union[_T_co, NestedSequence[_T_co]]: - ... - - def __len__(self, /) -> int: - ... - __all__ = [ "Any", diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py index 960462ff..7b0f552a 100644 --- a/spec/API_specification/dataframe_api/column_object.py +++ b/spec/API_specification/dataframe_api/column_object.py @@ -2,17 +2,14 @@ from typing import Any,NoReturn, TYPE_CHECKING, Literal, Generic -from ._types import DType - if TYPE_CHECKING: - from . import Bool - from ._types import NullType, Scalar + from ._types import NullType, Scalar, DType __all__ = ['Column'] -class Column(Generic[DType]): +class Column: """ Column object @@ -73,21 +70,21 @@ def dtype(self) -> Any: Return data type of column. """ - def get_rows(self: Column[DType], indices: Column[Any]) -> Column[DType]: + def get_rows(self: Column, indices: Column) -> Column: """ Select a subset of rows, similar to `ndarray.take`. Parameters ---------- - indices : Column[int] + indices : Column Positions of rows to select. """ ... def slice_rows( - self: Column[DType], start: int | None, stop: int | None, step: int | None - ) -> Column[DType]: + self: Column, start: int | None, stop: int | None, step: int | None + ) -> Column: """ Select a subset of rows corresponding to a slice. @@ -104,13 +101,13 @@ def slice_rows( ... - def filter(self: Column[DType], mask: Column[Bool]) -> Column[DType]: + def filter(self: Column, mask: Column) -> Column: """ Select a subset of rows corresponding to a mask. Parameters ---------- - mask : Column[bool] + mask : Column Returns ------- @@ -146,7 +143,7 @@ def sort( *, ascending: bool = True, nulls_position: Literal['first', 'last'] = 'last', - ) -> Column[DType]: + ) -> Column: """ Sort column. @@ -175,7 +172,7 @@ def sorted_indices( *, ascending: bool = True, nulls_position: Literal['first', 'last'] = 'last', - ) -> Column[Any]: + ) -> Column: """ Return row numbers which would sort column. @@ -194,11 +191,11 @@ def sorted_indices( Returns ------- - Column[int] + Column """ ... - def __eq__(self, other: Column[Any] | Scalar) -> Column[Bool]: # type: ignore[override] + def __eq__(self, other: Column | Scalar) -> Column: # type: ignore[override] """ Compare for equality. @@ -216,7 +213,7 @@ def __eq__(self, other: Column[Any] | Scalar) -> Column[Bool]: # type: ignore[o Column """ - def __ne__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: # type: ignore[override] + def __ne__(self: Column, other: Column | Scalar) -> Column: # type: ignore[override] """ Compare for non-equality. @@ -234,7 +231,7 @@ def __ne__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: Column """ - def __ge__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: + def __ge__(self: Column, other: Column | Scalar) -> Column: """ Compare for "greater than or equal to" `other`. @@ -250,7 +247,7 @@ def __ge__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: Column """ - def __gt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: + def __gt__(self: Column, other: Column | Scalar) -> Column: """ Compare for "greater than" `other`. @@ -266,7 +263,7 @@ def __gt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: Column """ - def __le__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: + def __le__(self: Column, other: Column | Scalar) -> Column: """ Compare for "less than or equal to" `other`. @@ -282,7 +279,7 @@ def __le__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: Column """ - def __lt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: + def __lt__(self: Column, other: Column | Scalar) -> Column: """ Compare for "less than" `other`. @@ -298,7 +295,7 @@ def __lt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]: Column """ - def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: + def __and__(self: Column, other: Column | bool) -> Column: """ Apply logical 'and' to `other` Column (or scalar) and this Column. @@ -306,7 +303,7 @@ def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: Parameters ---------- - other : Column[bool] or bool + other : Column or bool If Column, must have same length. Returns @@ -319,7 +316,7 @@ def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: If `self` or `other` is not boolean. """ - def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: + def __or__(self: Column, other: Column | bool) -> Column: """ Apply logical 'or' to `other` Column (or scalar) and this column. @@ -327,12 +324,12 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: Parameters ---------- - other : Column[bool] or Scalar + other : Column or Scalar If Column, must have same length. Returns ------- - Column[bool] + Column Raises ------ @@ -340,7 +337,7 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]: If `self` or `other` is not boolean. """ - def __add__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]: + def __add__(self: Column, other: Column | Scalar) -> Column: """ Add `other` column or scalar to this column. @@ -356,7 +353,7 @@ def __add__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __sub__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]: + def __sub__(self: Column, other: Column | Scalar) -> Column: """ Subtract `other` column or scalar from this column. @@ -372,7 +369,7 @@ def __sub__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __mul__(self, other: Column[Any] | Scalar) -> Column[Any]: + def __mul__(self, other: Column | Scalar) -> Column: """ Multiply `other` column or scalar with this column. @@ -388,7 +385,7 @@ def __mul__(self, other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __truediv__(self, other: Column[Any] | Scalar) -> Column[Any]: + def __truediv__(self, other: Column | Scalar) -> Column: """ Divide this column by `other` column or scalar. True division, returns floats. @@ -404,7 +401,7 @@ def __truediv__(self, other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __floordiv__(self, other: Column[Any] | Scalar) -> Column[Any]: + def __floordiv__(self, other: Column | Scalar) -> Column: """ Floor-divide `other` column or scalar to this column. @@ -420,7 +417,7 @@ def __floordiv__(self, other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __pow__(self, other: Column[Any] | Scalar) -> Column[Any]: + def __pow__(self, other: Column | Scalar) -> Column: """ Raise this column to the power of `other`. @@ -440,7 +437,7 @@ def __pow__(self, other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __mod__(self, other: Column[Any] | Scalar) -> Column[Any]: + def __mod__(self, other: Column | Scalar) -> Column: """ Returns modulus of this column by `other` (`%` operator). @@ -456,7 +453,7 @@ def __mod__(self, other: Column[Any] | Scalar) -> Column[Any]: Column """ - def __divmod__(self, other: Column[Any] | Scalar) -> tuple[Column[Any], Column[Any]]: + def __divmod__(self, other: Column | Scalar) -> tuple[Column, Column]: """ Return quotient and remainder of integer division. See `divmod` builtin function. @@ -472,7 +469,7 @@ def __divmod__(self, other: Column[Any] | Scalar) -> tuple[Column[Any], Column[A Column """ - def __invert__(self: Column[Bool]) -> Column[Bool]: + def __invert__(self: Column) -> Column: """ Invert truthiness of (boolean) elements. @@ -482,7 +479,7 @@ def __invert__(self: Column[Bool]) -> Column[Bool]: If any of the Column's columns is not boolean. """ - def any(self: Column[Bool], *, skip_nulls: bool = True) -> bool | NullType: + def any(self: Column, *, skip_nulls: bool = True) -> bool | NullType: """ Reduction returns a bool. @@ -492,7 +489,7 @@ def any(self: Column[Bool], *, skip_nulls: bool = True) -> bool | NullType: If column is not boolean. """ - def all(self: Column[Bool], *, skip_nulls: bool = True) -> bool | NullType: + def all(self: Column, *, skip_nulls: bool = True) -> bool | NullType: """ Reduction returns a bool. @@ -586,33 +583,33 @@ def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar Whether to skip null values. """ - def cumulative_max(self: Column[DType]) -> Column[DType]: + def cumulative_max(self: Column) -> Column: """ Reduction returns a Column. Any data type that supports comparisons must be supported. The returned value has the same dtype as the column. """ - def cumulative_min(self: Column[DType]) -> Column[DType]: + def cumulative_min(self: Column) -> Column: """ Reduction returns a Column. Any data type that supports comparisons must be supported. The returned value has the same dtype as the column. """ - def cumulative_sum(self: Column[DType]) -> Column[DType]: + def cumulative_sum(self: Column) -> Column: """ Reduction returns a Column. Must be supported for numerical and datetime data types. The returned value has the same dtype as the column. """ - def cumulative_prod(self: Column[DType]) -> Column[DType]: + def cumulative_prod(self: Column) -> Column: """ Reduction returns a Column. Must be supported for numerical and datetime data types. The returned value has the same dtype as the column. """ - def is_null(self) -> Column[Bool]: + def is_null(self) -> Column: """ Check for 'missing' or 'null' entries. @@ -631,7 +628,7 @@ def is_null(self) -> Column[Bool]: but note that the Standard makes no guarantees about them. """ - def is_nan(self) -> Column[Bool]: + def is_nan(self) -> Column: """ Check for nan entries. @@ -650,7 +647,7 @@ def is_nan(self) -> Column[Bool]: In particular, does not check for `np.timedelta64('NaT')`. """ - def is_in(self: Column[DType], values: Column[DType]) -> Column[Bool]: + def is_in(self: Column, values: Column) -> Column: """ Indicate whether the value at each row matches any value in `values`. @@ -665,16 +662,16 @@ def is_in(self: Column[DType], values: Column[DType]) -> Column[Bool]: Returns ------- - Column[bool] + Column """ - def unique_indices(self, *, skip_nulls: bool = True) -> Column[Any]: + def unique_indices(self, *, skip_nulls: bool = True) -> Column: """ Return indices corresponding to unique values in Column. Returns ------- - Column[int] + Column Indices corresponding to unique values. Notes @@ -689,7 +686,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[Any]: """ ... - def fill_nan(self: Column[DType], value: float | NullType, /) -> Column[DType]: + def fill_nan(self: Column, value: float | NullType, /) -> Column: """ Fill floating point ``nan`` values with the given fill value. @@ -703,7 +700,7 @@ def fill_nan(self: Column[DType], value: float | NullType, /) -> Column[DType]: """ ... - def fill_null(self: Column[DType], value: Scalar, /) -> Column[DType]: + def fill_null(self: Column, value: Scalar, /) -> Column: """ Fill null values with the given fill value. @@ -716,7 +713,7 @@ def fill_null(self: Column[DType], value: Scalar, /) -> Column[DType]: """ ... - def to_array_object(self, dtype: Any) -> Any: + def to_array_object(self, dtype: DType) -> Any: """ Convert to array-API-compliant object. @@ -751,7 +748,7 @@ def to_array_object(self, dtype: Any) -> Any: ``array-api-compat`` package to convert it to a Standard-compliant array. """ - def rename(self, name: str) -> Column[DType]: + def rename(self, name: str) -> Column: """ Rename column. diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 9a727cf3..1994c674 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -6,8 +6,7 @@ if TYPE_CHECKING: from .column_object import Column from .groupby_object import GroupBy - from . import Bool - from ._types import NullType, Scalar + from ._types import NullType, Scalar, DType __all__ = ["DataFrame"] @@ -90,7 +89,7 @@ def group_by(self, keys: str | list[str], /) -> GroupBy: """ ... - def get_column_by_name(self, name: str, /) -> Column[Any]: + def get_column_by_name(self, name: str, /) -> Column: """ Select a column by name. @@ -128,13 +127,13 @@ def select(self, names: Sequence[str], /) -> DataFrame: """ ... - def get_rows(self, indices: Column[Any]) -> DataFrame: + def get_rows(self, indices: Column) -> DataFrame: """ Select a subset of rows, similar to `ndarray.take`. Parameters ---------- - indices : Column[int] + indices : Column Positions of rows to select. Returns @@ -161,13 +160,13 @@ def slice_rows( """ ... - def filter(self, mask: Column[Bool]) -> DataFrame: + def filter(self, mask: Column) -> DataFrame: """ Select a subset of rows corresponding to a mask. Parameters ---------- - mask : Column[bool] + mask : Column Returns ------- @@ -180,7 +179,7 @@ def filter(self, mask: Column[Bool]) -> DataFrame: """ ... - def insert_column(self, column: Column[Any]) -> DataFrame: + def insert_column(self, column: Column) -> DataFrame: """ Insert column into DataFrame at rightmost location. @@ -209,7 +208,7 @@ def insert_column(self, column: Column[Any]) -> DataFrame: """ ... - def update_columns(self, columns: Column[Any] | Sequence[Column[Any]], /) -> DataFrame: + def update_columns(self, columns: Column | Sequence[Column], /) -> DataFrame: """ Update values in existing column(s) from Dataframe. @@ -336,7 +335,7 @@ def sorted_indices( *, ascending: Sequence[bool] | bool = True, nulls_position: Literal['first', 'last'] = 'last', - ) -> Column[Any]: + ) -> Column: """ Return row numbers which would sort according to given columns. @@ -361,7 +360,7 @@ def sorted_indices( Returns ------- - Column[int] + Column Raises ------ @@ -687,7 +686,7 @@ def all(self, *, skip_nulls: bool = True) -> DataFrame: """ ... - def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]: + def any_rowwise(self, *, skip_nulls: bool = True) -> Column: """ Reduction returns a Column. @@ -701,7 +700,7 @@ def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]: """ ... - def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]: + def all_rowwise(self, *, skip_nulls: bool = True) -> Column: """ Reduction returns a Column. @@ -821,7 +820,7 @@ def is_nan(self) -> DataFrame: """ ... - def unique_indices(self, keys: str | list[str] | None = None, *, skip_nulls: bool = True) -> Column[int]: + def unique_indices(self, keys: str | list[str] | None = None, *, skip_nulls: bool = True) -> Column: """ Return indices corresponding to unique values across selected columns. @@ -833,7 +832,7 @@ def unique_indices(self, keys: str | list[str] | None = None, *, skip_nulls: boo Returns ------- - Column[int] + Column Indices corresponding to unique values. Notes @@ -897,7 +896,7 @@ def fill_null( """ ... - def to_array_object(self, dtype: Any) -> Any: + def to_array_object(self, dtype: DType) -> Any: """ Convert to array-API-compliant object. diff --git a/spec/API_specification/dataframe_api/dtypes.py b/spec/API_specification/dataframe_api/dtypes.py new file mode 100644 index 00000000..c984542f --- /dev/null +++ b/spec/API_specification/dataframe_api/dtypes.py @@ -0,0 +1,33 @@ +class Int64: + """Integer type with 64 bits of precision.""" + +class Int32: + """Integer type with 32 bits of precision.""" + +class Int16: + """Integer type with 16 bits of precision.""" + +class Int8: + """Integer type with 8 bits of precision.""" + +class UInt64: + """Unsigned integer type with 64 bits of precision.""" + +class UInt32: + """Unsigned integer type with 32 bits of precision.""" + +class UInt16: + """Unsigned integer type with 16 bits of precision.""" + +class UInt8: + """Unsigned integer type with 8 bits of precision.""" + +class Float64: + """Floating point type with 64 bits of precision.""" + +class Float32: + """Floating point type with 32 bits of precision.""" + +class Bool: + """Boolean type with 8 bits of precision.""" +