@@ -29,33 +29,34 @@ def _from_dataframe(df : DataFrameXchg) -> pd.DataFrame:
29
29
only Pandas. Later, we need to implement/test support for categoricals,
30
30
bit/byte masks, chunk handling, etc.
31
31
"""
32
- # Check number of chunks, if there's more than one we need to iterate
33
- if df .num_chunks () > 1 :
34
- raise NotImplementedError
35
-
36
- # We need a dict of columns here, with each column being a numpy array (at
37
- # least for now, deal with non-numpy dtypes later).
38
- columns = dict ()
39
32
_buffers = [] # hold on to buffers, keeps memory alive
40
- for name in df .column_names ():
41
- if not isinstance (name , str ):
42
- raise ValueError (f"Column { name } is not a string" )
43
- if name in columns :
44
- raise ValueError (f"Column { name } is not unique" )
45
- col = df .get_column_by_name (name )
46
- if col .dtype [0 ] in (DtypeKind .INT , DtypeKind .UINT , DtypeKind .FLOAT , DtypeKind .BOOL ):
47
- # Simple numerical or bool dtype, turn into numpy array
48
- columns [name ], _buf = convert_column_to_ndarray (col )
49
- elif col .dtype [0 ] == DtypeKind .CATEGORICAL :
50
- columns [name ], _buf = convert_categorical_column (col )
51
- elif col .dtype [0 ] == DtypeKind .STRING :
52
- columns [name ], _buf = convert_string_column (col )
53
- else :
54
- raise NotImplementedError (f"Data type { col .dtype [0 ]} not handled yet" )
33
+ result = []
34
+ for chunk in df .get_chunks ():
35
+ # We need a dict of columns here, with each column being a numpy array (at
36
+ # least for now, deal with non-numpy dtypes later).
37
+ chunk_cols = {}
38
+ for name in chunk .column_names ():
39
+ if not isinstance (name , str ):
40
+ raise ValueError (f"Column { name } is not a string" )
41
+ if name in chunk_cols :
42
+ raise ValueError (f"Column { name } is not unique" )
43
+ col = chunk .get_column_by_name (name )
44
+ if col .dtype [0 ] in (DtypeKind .INT , DtypeKind .UINT , DtypeKind .FLOAT , DtypeKind .BOOL ):
45
+ # Simple numerical or bool dtype, turn into numpy array
46
+ chunk_cols [name ], _buf = convert_column_to_ndarray (col )
47
+ elif col .dtype [0 ] == DtypeKind .CATEGORICAL :
48
+ chunk_cols [name ], _buf = convert_categorical_column (col )
49
+ elif col .dtype [0 ] == DtypeKind .STRING :
50
+ chunk_cols [name ], _buf = convert_string_column (col )
51
+ else :
52
+ raise NotImplementedError (f"Data type { col .dtype [0 ]} not handled yet" )
53
+
54
+ _buffers .append (_buf )
55
55
56
- _buffers .append (_buf )
56
+ df_new = pd .DataFrame (chunk_cols )
57
+ result .append (df_new )
57
58
58
- df_new = pd .DataFrame ( columns )
59
+ df_new = pd .concat ( result )
59
60
df_new ._buffers = _buffers
60
61
return df_new
61
62
0 commit comments