3
3
4
4
from typing import Tuple , Any
5
5
6
- from .dataframe_protocol import Buffer , Column , DataFrame as DataFrameXchg , DtypeKind , DlpackDeviceType
6
+ from .dataframe_protocol import Buffer , Column , ColumnNullType , DataFrame as DataFrameXchg , DtypeKind , DlpackDeviceType
7
7
8
8
import pandas as pd
9
9
import numpy as np
10
10
11
+ _NP_DTYPES = {
12
+ DtypeKind .INT : {8 : np .int8 , 16 : np .int16 , 32 : np .int32 , 64 : np .int64 },
13
+ DtypeKind .UINT : {8 : np .uint8 , 16 : np .uint16 , 32 : np .uint32 , 64 : np .uint64 },
14
+ DtypeKind .FLOAT : {32 : np .float32 , 64 : np .float64 },
15
+ DtypeKind .BOOL : {8 : bool },
16
+ }
11
17
12
18
def from_dataframe (df : DataFrameXchg ,
13
19
allow_copy : bool = True ) -> pd .DataFrame :
@@ -65,10 +71,7 @@ def convert_column_to_ndarray(col : Column) -> Tuple[np.ndarray, Buffer]:
65
71
"""
66
72
Convert an int, uint, float or bool column to a numpy array.
67
73
"""
68
- if col .offset != 0 :
69
- raise NotImplementedError ("column.offset > 0 not handled yet" )
70
-
71
- if col .describe_null [0 ] not in (0 , 1 ):
74
+ if col .describe_null [0 ] not in (ColumnNullType .NON_NULLABLE , ColumnNullType .USE_NAN ):
72
75
raise NotImplementedError ("Null values represented as masks or "
73
76
"sentinel values not handled yet" )
74
77
@@ -80,14 +83,10 @@ def buffer_to_ndarray(_buffer : Buffer, _dtype) -> np.ndarray:
80
83
# Handle the dtype
81
84
kind = _dtype [0 ]
82
85
bitwidth = _dtype [1 ]
83
- if _dtype [ 0 ] not in ( DtypeKind . INT , DtypeKind . UINT , DtypeKind . FLOAT , DtypeKind . BOOL ) :
84
- raise RuntimeError ("Not a boolean, integer or floating-point dtype " )
86
+ if kind not in _NP_DTYPES :
87
+ raise RuntimeError (f"Unsupported data type: { kind } " )
85
88
86
- _ints = {8 : np .int8 , 16 : np .int16 , 32 : np .int32 , 64 : np .int64 }
87
- _uints = {8 : np .uint8 , 16 : np .uint16 , 32 : np .uint32 , 64 : np .uint64 }
88
- _floats = {32 : np .float32 , 64 : np .float64 }
89
- _np_dtypes = {0 : _ints , 1 : _uints , 2 : _floats , 20 : {8 : bool }}
90
- column_dtype = _np_dtypes [kind ][bitwidth ]
89
+ column_dtype = _NP_DTYPES [kind ][bitwidth ]
91
90
92
91
# No DLPack yet, so need to construct a new ndarray from the data pointer
93
92
# and size in the buffer plus the dtype on the column
@@ -124,10 +123,10 @@ def convert_categorical_column(col : Column) -> Tuple[pd.Series, Buffer]:
124
123
cat = pd .Categorical (values , categories = categories , ordered = ordered )
125
124
series = pd .Series (cat )
126
125
null_kind = col .describe_null [0 ]
127
- if null_kind == 2 : # sentinel value
126
+ if null_kind == ColumnNullType . USE_SENTINEL : # sentinel value
128
127
sentinel = col .describe_null [1 ]
129
128
series [codes == sentinel ] = np .nan
130
- else :
129
+ elif null_kind != ColumnNullType . NON_NULLABLE :
131
130
raise NotImplementedError ("Only categorical columns with sentinel "
132
131
"value supported at the moment" )
133
132
@@ -164,16 +163,16 @@ def convert_string_column(col : Column) -> Tuple[np.ndarray, dict]:
164
163
str_list = []
165
164
for i in range (obuf .size - 1 ):
166
165
# Check for missing values
167
- if null_kind == 3 : # bit mask
168
- v = mbuf [i / 8 ]
166
+ if null_kind == ColumnNullType . USE_BITMASK :
167
+ v = mbuf [i // 8 ]
169
168
if null_value == 1 :
170
169
v = ~ v
171
170
172
- if v & (1 << (i % 8 )):
171
+ if v & (1 << (i % 8 )):
173
172
str_list .append (np .nan )
174
173
continue
175
174
176
- elif null_kind == 4 and mbuf [i ] == null_value : # byte mask
175
+ elif null_kind == ColumnNullType . USE_BYTEMASK and mbuf [i ] == null_value :
177
176
str_list .append (np .nan )
178
177
continue
179
178
@@ -268,8 +267,7 @@ def __init__(self, column : pd.Series,
268
267
Series/ndarray for now.
269
268
"""
270
269
if not isinstance (column , pd .Series ):
271
- raise NotImplementedError ("Columns of type {} not handled "
272
- "yet" .format (type (column )))
270
+ raise NotImplementedError (f"Columns of type { type (column )} not handled yet" )
273
271
274
272
# Store the column as a private attribute
275
273
self ._col = column
0 commit comments