3
3
"""
4
4
from __future__ import annotations
5
5
6
- from typing import Mapping , Sequence , Any
6
+ from typing import Mapping , Sequence , Any , Literal
7
7
8
- from .column_object import *
8
+ from .expression_object import *
9
9
from .dataframe_object import DataFrame
10
10
from .groupby_object import *
11
11
from ._types import DType
12
12
13
13
__all__ = [
14
14
"__dataframe_api_version__" ,
15
15
"DataFrame" ,
16
- "Column" ,
17
- "column_from_sequence" ,
18
- "column_from_1d_array" ,
16
+ "col" ,
19
17
"concat" ,
20
- "dataframe_from_dict" ,
18
+ "sorted_indices" ,
19
+ "unique_indices" ,
21
20
"dataframe_from_2d_array" ,
22
21
"is_null" ,
23
22
"null" ,
43
42
implementation of the dataframe API standard.
44
43
"""
45
44
45
+ def col (name : str ) -> Expression :
46
+ """
47
+ Instantiate an Expression which selects given column by name.
48
+
49
+ For example, to select column 'species' and then use it to filter
50
+ a DataFrame, you could do:
51
+
52
+ .. code-block::python
53
+
54
+ df: DataFrame
55
+ namespace = df.__dataframe_namespace__()
56
+ df.get_rows_by_mask(pl.col('species') == 'setosa')
57
+ """
58
+ ...
59
+
46
60
def concat (dataframes : Sequence [DataFrame ]) -> DataFrame :
47
61
"""
48
62
Concatenate DataFrames vertically.
@@ -63,104 +77,116 @@ def concat(dataframes: Sequence[DataFrame]) -> DataFrame:
63
77
"""
64
78
...
65
79
66
- def column_from_sequence ( sequence : Sequence [ Any ], * , dtype : Any , name : str = '' , api_version : str | None = None ) -> Column [ Any ] :
80
+ def any_rowwise ( keys : list [ str ] | None = None , * , skip_nulls : bool = True ) -> Expression :
67
81
"""
68
- Construct Column from sequence of elements.
82
+ Reduction returns an Expression.
83
+
84
+ Differs from ``DataFrame.any`` and that the reduction happens
85
+ for each row, rather than for each column.
69
86
70
87
Parameters
71
88
----------
72
- sequence : Sequence[object]
73
- Sequence of elements. Each element must be of the specified
74
- ``dtype``, the corresponding Python builtin scalar type, or
75
- coercible to that Python scalar type.
76
- name : str, optional
77
- Name of column.
78
- dtype : DType
79
- Dtype of result. Must be specified.
80
- api_version: str | None
81
- A string representing the version of the dataframe API specification
82
- in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
83
- If it is ``None``, it should return an object corresponding to
84
- latest version of the dataframe API specification. If the given
85
- version is invalid or not implemented for the given module, an
86
- error should be raised. Default: ``None``.
89
+ keys : list[str]
90
+ Column names to consider. If `None`, all columns are considered.
87
91
88
- Returns
89
- -------
90
- Column
92
+ Raises
93
+ ------
94
+ ValueError
95
+ If any of the DataFrame's columns is not boolean.
91
96
"""
92
97
...
93
98
94
- def dataframe_from_dict ( data : Mapping [str , Column [ Any ]] , * , api_version : str | None = None ) -> DataFrame :
99
+ def all_rowwise ( keys : list [str ] | None = None , * , skip_nulls : bool = True ) -> Expression :
95
100
"""
96
- Construct DataFrame from map of column names to Columns.
101
+ Reduction returns a Column.
102
+
103
+ Differs from ``DataFrame.all`` and that the reduction happens
104
+ for each row, rather than for each column.
97
105
98
106
Parameters
99
107
----------
100
- data : Mapping[str, Column]
101
- Column must be of the corresponding type of the DataFrame.
102
- For example, it is only supported to build a ``LibraryXDataFrame`` using
103
- ``LibraryXColumn`` instances.
104
- api_version: str | None
105
- A string representing the version of the dataframe API specification
106
- in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
107
- If it is ``None``, it should return an object corresponding to
108
- latest version of the dataframe API specification. If the given
109
- version is invalid or not implemented for the given module, an
110
- error should be raised. Default: ``None``.
108
+ keys : list[str]
109
+ Column names to consider. If `None`, all columns are considered.
111
110
112
- Returns
113
- -------
114
- DataFrame
115
-
116
111
Raises
117
112
------
118
113
ValueError
119
- If any of the columns already has a name, and the corresponding key
120
- in `data` doesn't match.
121
-
114
+ If any of the DataFrame's columns is not boolean.
122
115
"""
123
116
...
124
117
118
+ def sorted_indices (
119
+ keys : str | list [str ] | None = None ,
120
+ * ,
121
+ ascending : Sequence [bool ] | bool = True ,
122
+ nulls_position : Literal ['first' , 'last' ] = 'last' ,
123
+ ) -> Expression :
124
+ """
125
+ Return row numbers which would sort according to given columns.
126
+
127
+ If you need to sort the DataFrame, use :meth:`DataFrame.sort`.
125
128
126
- def column_from_1d_array (array : Any , * , dtype : Any , name : str = '' , api_version : str | None = None ) -> Column [Any ]:
129
+ Parameters
130
+ ----------
131
+ keys : str | list[str], optional
132
+ Names of columns to sort by.
133
+ If `None`, sort by all columns.
134
+ ascending : Sequence[bool] or bool
135
+ If `True`, sort by all keys in ascending order.
136
+ If `False`, sort by all keys in descending order.
137
+ If a sequence, it must be the same length as `keys`,
138
+ and determines the direction with which to use each
139
+ key to sort by.
140
+ nulls_position : ``{'first', 'last'}``
141
+ Whether null values should be placed at the beginning
142
+ or at the end of the result.
143
+ Note that the position of NaNs is unspecified and may
144
+ vary based on the implementation.
145
+
146
+ Returns
147
+ -------
148
+ Expression
149
+
150
+ Raises
151
+ ------
152
+ ValueError
153
+ If `keys` and `ascending` are sequences of different lengths.
127
154
"""
128
- Construct Column from 1D array .
155
+ .. .
129
156
130
- See `dataframe_from_2d_array` for related 2D function.
131
157
132
- Only Array-API-compliant 1D arrays are supported.
133
- Cross-kind casting is undefined and may vary across implementations.
134
- Downcasting is disallowed .
158
+ def unique_indices ( keys : str | list [ str ] | None = None , * , skip_nulls : bool = True ) -> Expression :
159
+ """
160
+ Return indices corresponding to unique values across selected columns .
135
161
136
162
Parameters
137
163
----------
138
- array : array
139
- array-API compliant 1D array
140
- name : str, optional
141
- Name to give columns.
142
- dtype : DType
143
- Dtype of column.
144
- api_version: str | None
145
- A string representing the version of the dataframe API specification
146
- in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
147
- If it is ``None``, it should return an object corresponding to
148
- latest version of the dataframe API specification. If the given
149
- version is invalid or not implemented for the given module, an
150
- error should be raised. Default: ``None``.
164
+ keys : str | list[str], optional
165
+ Column names to consider when finding unique values.
166
+ If `None`, all columns are considered.
151
167
152
168
Returns
153
169
-------
154
- Column
170
+ Expression
171
+ Indices corresponding to unique values.
172
+
173
+ Notes
174
+ -----
175
+ There are no ordering guarantees. In particular, if there are multiple
176
+ indices corresponding to the same unique value(s), there is no guarantee
177
+ about which one will appear in the result.
178
+ If the original column(s) contain multiple `'NaN'` values, then
179
+ only a single index corresponding to those values will be returned.
180
+ Likewise for null values (if ``skip_nulls=False``).
181
+ To get the unique values, you can do ``df.get_rows(df.unique_indices(keys))``.
155
182
"""
156
183
...
157
184
158
- def dataframe_from_2d_array (array : Any , * , names : Sequence [str ], dtypes : Mapping [str , Any ], api_version : str | None = None ) -> DataFrame :
185
+
186
+ def dataframe_from_2d_array (array : Any , * , names : Sequence [str ], dtypes : Mapping [str , Any ]) -> DataFrame :
159
187
"""
160
188
Construct DataFrame from 2D array.
161
189
162
- See `column_from_1d_array` for related 1D function.
163
-
164
190
Only Array-API-compliant 2D arrays are supported.
165
191
Cross-kind casting is undefined and may vary across implementations.
166
192
Downcasting is disallowed.
0 commit comments