Skip to content

Commit 4adcd6b

Browse files
committed
Start implementing chunk support in from_df
Signed-off-by: Vasily Litvinov <vasilij.n.litvinov@intel.com>
1 parent 0dbdb0c commit 4adcd6b

File tree

1 file changed

+25
-24
lines changed

1 file changed

+25
-24
lines changed

pandas/api/exchange/implementation.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -29,33 +29,34 @@ def _from_dataframe(df : DataFrameXchg) -> pd.DataFrame:
2929
only Pandas. Later, we need to implement/test support for categoricals,
3030
bit/byte masks, chunk handling, etc.
3131
"""
32-
# Check number of chunks, if there's more than one we need to iterate
33-
if df.num_chunks() > 1:
34-
raise NotImplementedError
35-
36-
# We need a dict of columns here, with each column being a numpy array (at
37-
# least for now, deal with non-numpy dtypes later).
38-
columns = dict()
3932
_buffers = [] # hold on to buffers, keeps memory alive
40-
for name in df.column_names():
41-
if not isinstance(name, str):
42-
raise ValueError(f"Column {name} is not a string")
43-
if name in columns:
44-
raise ValueError(f"Column {name} is not unique")
45-
col = df.get_column_by_name(name)
46-
if col.dtype[0] in (DtypeKind.INT, DtypeKind.UINT, DtypeKind.FLOAT, DtypeKind.BOOL):
47-
# Simple numerical or bool dtype, turn into numpy array
48-
columns[name], _buf = convert_column_to_ndarray(col)
49-
elif col.dtype[0] == DtypeKind.CATEGORICAL:
50-
columns[name], _buf = convert_categorical_column(col)
51-
elif col.dtype[0] == DtypeKind.STRING:
52-
columns[name], _buf = convert_string_column(col)
53-
else:
54-
raise NotImplementedError(f"Data type {col.dtype[0]} not handled yet")
33+
result = []
34+
for chunk in df.get_chunks():
35+
# We need a dict of columns here, with each column being a numpy array (at
36+
# least for now, deal with non-numpy dtypes later).
37+
chunk_cols = {}
38+
for name in chunk.column_names():
39+
if not isinstance(name, str):
40+
raise ValueError(f"Column {name} is not a string")
41+
if name in chunk_cols:
42+
raise ValueError(f"Column {name} is not unique")
43+
col = chunk.get_column_by_name(name)
44+
if col.dtype[0] in (DtypeKind.INT, DtypeKind.UINT, DtypeKind.FLOAT, DtypeKind.BOOL):
45+
# Simple numerical or bool dtype, turn into numpy array
46+
chunk_cols[name], _buf = convert_column_to_ndarray(col)
47+
elif col.dtype[0] == DtypeKind.CATEGORICAL:
48+
chunk_cols[name], _buf = convert_categorical_column(col)
49+
elif col.dtype[0] == DtypeKind.STRING:
50+
chunk_cols[name], _buf = convert_string_column(col)
51+
else:
52+
raise NotImplementedError(f"Data type {col.dtype[0]} not handled yet")
53+
54+
_buffers.append(_buf)
5555

56-
_buffers.append(_buf)
56+
df_new = pd.DataFrame(chunk_cols)
57+
result.append(df_new)
5758

58-
df_new = pd.DataFrame(columns)
59+
df_new = pd.concat(result)
5960
df_new._buffers = _buffers
6061
return df_new
6162

0 commit comments

Comments
 (0)