1
1
from typing import Tuple , Optional , Dict , Any , Iterable , Sequence , TypedDict
2
2
import enum
3
+ from abc import ABC , abstractmethod
3
4
4
5
class DlpackDeviceType (enum .IntEnum ):
5
6
CPU = 1
@@ -20,7 +21,7 @@ class DtypeKind(enum.IntEnum):
20
21
DATETIME = 22
21
22
CATEGORICAL = 23
22
23
23
- class ColumnNullType :
24
+ class ColumnNullType ( enum . IntEnum ) :
24
25
NON_NULLABLE = 0
25
26
USE_NAN = 1
26
27
USE_SENTINEL = 2
@@ -42,7 +43,7 @@ class ColumnBuffers(TypedDict):
42
43
# an associated offsets buffer
43
44
44
45
45
- class Buffer :
46
+ class Buffer ( ABC ) :
46
47
"""
47
48
Data in the buffer is guaranteed to be contiguous in memory.
48
49
Note that there is no dtype attribute present, a buffer can be thought of
@@ -56,19 +57,22 @@ class Buffer:
56
57
"""
57
58
58
59
@property
60
+ @abstractmethod
59
61
def bufsize (self ) -> int :
60
62
"""
61
63
Buffer size in bytes.
62
64
"""
63
65
pass
64
66
65
67
@property
68
+ @abstractmethod
66
69
def ptr (self ) -> int :
67
70
"""
68
71
Pointer to start of the buffer as an integer.
69
72
"""
70
73
pass
71
74
75
+ @abstractmethod
72
76
def __dlpack__ (self ):
73
77
"""
74
78
Produce DLPack capsule (see array API standard).
@@ -80,6 +84,7 @@ def __dlpack__(self):
80
84
"""
81
85
raise NotImplementedError ("__dlpack__" )
82
86
87
+ @abstractmethod
83
88
def __dlpack_device__ (self ) -> Tuple [DlpackDeviceType , int ]:
84
89
"""
85
90
Device type and device ID for where the data in the buffer resides.
@@ -89,7 +94,7 @@ def __dlpack_device__(self) -> Tuple[DlpackDeviceType, int]:
89
94
pass
90
95
91
96
92
- class Column :
97
+ class Column ( ABC ) :
93
98
"""
94
99
A column object, with only the methods and properties required by the
95
100
interchange protocol defined.
@@ -127,6 +132,7 @@ class Column:
127
132
"""
128
133
129
134
@property
135
+ @abstractmethod
130
136
def size (self ) -> Optional [int ]:
131
137
"""
132
138
Size of the column, in elements.
@@ -136,6 +142,7 @@ def size(self) -> Optional[int]:
136
142
pass
137
143
138
144
@property
145
+ @abstractmethod
139
146
def offset (self ) -> int :
140
147
"""
141
148
Offset of first element.
@@ -146,6 +153,7 @@ def offset(self) -> int:
146
153
pass
147
154
148
155
@property
156
+ @abstractmethod
149
157
def dtype (self ) -> Tuple [DtypeKind , int , str , str ]:
150
158
"""
151
159
Dtype description as a tuple ``(kind, bit-width, format string, endianness)``.
@@ -175,6 +183,7 @@ def dtype(self) -> Tuple[DtypeKind, int, str, str]:
175
183
pass
176
184
177
185
@property
186
+ @abstractmethod
178
187
def describe_categorical (self ) -> Tuple [bool , bool , dict ]:
179
188
"""
180
189
If the dtype is categorical, there are two options:
@@ -194,6 +203,7 @@ def describe_categorical(self) -> Tuple[bool, bool, dict]:
194
203
pass
195
204
196
205
@property
206
+ @abstractmethod
197
207
def describe_null (self ) -> Tuple [ColumnNullType , Any ]:
198
208
"""
199
209
Return the missing value (or "null") representation the column dtype
@@ -205,6 +215,7 @@ def describe_null(self) -> Tuple[ColumnNullType, Any]:
205
215
pass
206
216
207
217
@property
218
+ @abstractmethod
208
219
def null_count (self ) -> Optional [int ]:
209
220
"""
210
221
Number of null elements, if known.
@@ -213,25 +224,29 @@ def null_count(self) -> Optional[int]:
213
224
pass
214
225
215
226
@property
227
+ @abstractmethod
216
228
def metadata (self ) -> Dict [str , Any ]:
217
229
"""
218
230
The metadata for the column. See `DataFrame.metadata` for more details.
219
231
"""
220
232
pass
221
233
234
+ @abstractmethod
222
235
def num_chunks (self ) -> int :
223
236
"""
224
237
Return the number of chunks the column consists of.
225
238
"""
226
239
pass
227
240
241
+ @abstractmethod
228
242
def get_chunks (self , n_chunks : Optional [int ] = None ) -> Iterable ["Column" ]:
229
243
"""
230
244
Return an iterator yielding the chunks.
231
245
See `DataFrame.get_chunks` for details on ``n_chunks``.
232
246
"""
233
247
pass
234
248
249
+ @abstractmethod
235
250
def get_buffers (self ) -> ColumnBuffers :
236
251
"""
237
252
Return a dictionary containing the underlying buffers.
@@ -261,7 +276,7 @@ def get_buffers(self) -> ColumnBuffers:
261
276
# pass
262
277
263
278
264
- class DataFrame :
279
+ class DataFrame ( ABC ) :
265
280
"""
266
281
A data frame class, with only the methods required by the interchange
267
282
protocol defined.
@@ -276,6 +291,7 @@ class DataFrame:
276
291
version = 0 # version of the protocol
277
292
278
293
@property
294
+ @abstractmethod
279
295
def metadata (self ) -> Dict [str , Any ]:
280
296
"""
281
297
The metadata for the data frame, as a dictionary with string keys. The
@@ -288,12 +304,14 @@ def metadata(self) -> Dict[str, Any]:
288
304
"""
289
305
pass
290
306
307
+ @abstractmethod
291
308
def num_columns (self ) -> int :
292
309
"""
293
310
Return the number of columns in the DataFrame.
294
311
"""
295
312
pass
296
313
314
+ @abstractmethod
297
315
def num_rows (self ) -> Optional [int ]:
298
316
# TODO: not happy with Optional, but need to flag it may be expensive
299
317
# why include it if it may be None - what do we expect consumers
@@ -303,48 +321,56 @@ def num_rows(self) -> Optional[int]:
303
321
"""
304
322
pass
305
323
324
+ @abstractmethod
306
325
def num_chunks (self ) -> int :
307
326
"""
308
327
Return the number of chunks the DataFrame consists of.
309
328
"""
310
329
pass
311
330
331
+ @abstractmethod
312
332
def column_names (self ) -> Iterable [str ]:
313
333
"""
314
334
Return an iterator yielding the column names.
315
335
"""
316
336
pass
317
337
338
+ @abstractmethod
318
339
def get_column (self , i : int ) -> Column :
319
340
"""
320
341
Return the column at the indicated position.
321
342
"""
322
343
pass
323
344
345
+ @abstractmethod
324
346
def get_column_by_name (self , name : str ) -> Column :
325
347
"""
326
348
Return the column whose name is the indicated name.
327
349
"""
328
350
pass
329
351
352
+ @abstractmethod
330
353
def get_columns (self ) -> Iterable [Column ]:
331
354
"""
332
355
Return an iterator yielding the columns.
333
356
"""
334
357
pass
335
358
359
+ @abstractmethod
336
360
def select_columns (self , indices : Sequence [int ]) -> "DataFrame" :
337
361
"""
338
362
Create a new DataFrame by selecting a subset of columns by index.
339
363
"""
340
364
pass
341
365
366
+ @abstractmethod
342
367
def select_columns_by_name (self , names : Sequence [str ]) -> "DataFrame" :
343
368
"""
344
369
Create a new DataFrame by selecting a subset of columns by name.
345
370
"""
346
371
pass
347
372
373
+ @abstractmethod
348
374
def get_chunks (self , n_chunks : Optional [int ] = None ) -> Iterable ["DataFrame" ]:
349
375
"""
350
376
Return an iterator yielding the chunks.
0 commit comments