Skip to content

Commit cb3fa65

Browse files
committed
Separate buffer and column implementations
Signed-off-by: Vasily Litvinov <vasilij.n.litvinov@intel.com>
1 parent d64e5e4 commit cb3fa65

File tree

4 files changed

+583
-570
lines changed

4 files changed

+583
-570
lines changed

pandas/core/exchange/buffer.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from pandas.core.exchange.dataframe_protocol import Buffer, DlpackDeviceType, DtypeKind
2+
import numpy as np
3+
from typing import Tuple
4+
import ctypes
5+
6+
7+
_NP_DTYPES = {
8+
DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64},
9+
DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64},
10+
DtypeKind.FLOAT: {32: np.float32, 64: np.float64},
11+
DtypeKind.BOOL: {8: bool},
12+
}
13+
14+
15+
class PandasBuffer(Buffer):
16+
"""
17+
Data in the buffer is guaranteed to be contiguous in memory.
18+
"""
19+
20+
def __init__(self, x: np.ndarray, allow_copy: bool = True) -> None:
21+
"""
22+
Handle only regular columns (= numpy arrays) for now.
23+
"""
24+
if not x.strides == (x.dtype.itemsize,):
25+
# The protocol does not support strided buffers, so a copy is
26+
# necessary. If that's not allowed, we need to raise an exception.
27+
if allow_copy:
28+
x = x.copy()
29+
else:
30+
raise RuntimeError(
31+
"Exports cannot be zero-copy in the case "
32+
"of a non-contiguous buffer"
33+
)
34+
35+
# Store the numpy array in which the data resides as a private
36+
# attribute, so we can use it to retrieve the public attributes
37+
self._x = x
38+
39+
@property
40+
def bufsize(self) -> int:
41+
"""
42+
Buffer size in bytes.
43+
"""
44+
return self._x.size * self._x.dtype.itemsize
45+
46+
@property
47+
def ptr(self) -> int:
48+
"""
49+
Pointer to start of the buffer as an integer.
50+
"""
51+
return self._x.__array_interface__["data"][0]
52+
53+
def __dlpack__(self):
54+
"""
55+
DLPack not implemented in NumPy yet, so leave it out here.
56+
"""
57+
raise NotImplementedError("__dlpack__")
58+
59+
def __dlpack_device__(self) -> Tuple[DlpackDeviceType, int]:
60+
"""
61+
Device type and device ID for where the data in the buffer resides.
62+
"""
63+
return (DlpackDeviceType.CPU, None)
64+
65+
def __repr__(self) -> str:
66+
return (
67+
"PandasBuffer("
68+
+ str(
69+
{
70+
"bufsize": self.bufsize,
71+
"ptr": self.ptr,
72+
"device": self.__dlpack_device__()[0].name,
73+
}
74+
)
75+
+ ")"
76+
)
77+
78+
79+
def buffer_to_ndarray(_buffer: Buffer, _dtype) -> np.ndarray:
80+
# Handle the dtype
81+
kind = _dtype[0]
82+
bitwidth = _dtype[1]
83+
if kind not in _NP_DTYPES:
84+
raise RuntimeError(f"Unsupported data type: {kind}")
85+
86+
column_dtype = _NP_DTYPES[kind][bitwidth]
87+
88+
# No DLPack yet, so need to construct a new ndarray from the data pointer
89+
# and size in the buffer plus the dtype on the column
90+
ctypes_type = np.ctypeslib.as_ctypes_type(column_dtype)
91+
data_pointer = ctypes.cast(_buffer.ptr, ctypes.POINTER(ctypes_type))
92+
93+
# NOTE: `x` does not own its memory, so the caller of this function must
94+
# either make a copy or hold on to a reference of the column or
95+
# buffer! (not done yet, this is pretty awful ...)
96+
x = np.ctypeslib.as_array(data_pointer, shape=(_buffer.bufsize // (bitwidth // 8),))
97+
98+
return x

0 commit comments

Comments
 (0)