From df24499fe29d40d47a08f5ee2e132a83eb13660a Mon Sep 17 00:00:00 2001
From: iskode <ismael.from.kone@gmail.com>
Date: Fri, 29 Oct 2021 08:41:23 +0000
Subject: [PATCH 1/3] add module level constant +auxiliary function

---
 protocol/pandas_implementation.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/protocol/pandas_implementation.py b/protocol/pandas_implementation.py
index a016d25c..87af856f 100644
--- a/protocol/pandas_implementation.py
+++ b/protocol/pandas_implementation.py
@@ -97,6 +97,10 @@ class _DtypeKind(enum.IntEnum):
     DATETIME = 22
     CATEGORICAL = 23
 
+_INTS = {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}
+_UNITS = {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64}
+_FLOATS = {32: np.float32, 64: np.float64}
+_NP_DTYPES = {0: _INTS, 1: _UNITS, 2: _FLOATS, 20: {8: bool}}
 
 def convert_column_to_ndarray(col : ColumnObject) -> np.ndarray:
     """
@@ -114,18 +118,8 @@ def convert_column_to_ndarray(col : ColumnObject) -> np.ndarray:
 
 
 def buffer_to_ndarray(_buffer, _dtype) -> np.ndarray:
-    # Handle the dtype
-    kind = _dtype[0]
     bitwidth = _dtype[1]
-    _k = _DtypeKind
-    if _dtype[0] not in (_k.INT, _k.UINT, _k.FLOAT, _k.BOOL):
-        raise RuntimeError("Not a boolean, integer or floating-point dtype")
-
-    _ints = {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}
-    _uints = {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64}
-    _floats = {32: np.float32, 64: np.float64}
-    _np_dtypes = {0: _ints, 1: _uints, 2: _floats, 20: {8: bool}}
-    column_dtype = _np_dtypes[kind][bitwidth]
+    column_dtype = protocol_dtype_to_np_dtype(_dtype)
 
     # No DLPack yet, so need to construct a new ndarray from the data pointer
     # and size in the buffer plus the dtype on the column
@@ -140,6 +134,14 @@ def buffer_to_ndarray(_buffer, _dtype) -> np.ndarray:
 
     return x
 
+def protocol_dtype_to_np_dtype(_dtype):
+    kind = _dtype[0]
+    bitwidth = _dtype[1]
+    _k = _DtypeKind
+    if _dtype[0] not in (_k.INT, _k.UINT, _k.FLOAT, _k.BOOL):
+        raise RuntimeError("Not a boolean, integer or floating-point dtype")
+   
+    return _NP_DTYPES[kind][bitwidth]
 
 def convert_categorical_column(col : ColumnObject) -> pd.Series:
     """

From 5530e8c66a9027f6dfad813d9ed6e3310bbe9948 Mon Sep 17 00:00:00 2001
From: iskode <ismael.from.kone@gmail.com>
Date: Fri, 29 Oct 2021 08:47:35 +0000
Subject: [PATCH 2/3] add module level Device enum and the remove local one

---
 protocol/pandas_implementation.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/protocol/pandas_implementation.py b/protocol/pandas_implementation.py
index 87af856f..13521ea5 100644
--- a/protocol/pandas_implementation.py
+++ b/protocol/pandas_implementation.py
@@ -97,6 +97,16 @@ class _DtypeKind(enum.IntEnum):
     DATETIME = 22
     CATEGORICAL = 23
 
+class _Device(enum.IntEnum):
+    CPU = 1
+    CUDA = 2
+    CPU_PINNED = 3
+    OPENCL = 4
+    VULKAN = 7
+    METAL = 8
+    VPI = 9
+    ROCM = 10
+
 _INTS = {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}
 _UNITS = {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64}
 _FLOATS = {32: np.float32, 64: np.float64}
@@ -311,10 +321,7 @@ def __dlpack_device__(self) -> Tuple[enum.IntEnum, int]:
         """
         Device type and device ID for where the data in the buffer resides.
         """
-        class Device(enum.IntEnum):
-            CPU = 1
-
-        return (Device.CPU, None)
+        return (_Device.CPU, None)
 
     def __repr__(self) -> str:
         return 'PandasBuffer(' + str({'bufsize': self.bufsize,

From ffb9ae8176e4b6f229b2446e90f69ef1ab9ee79e Mon Sep 17 00:00:00 2001
From: iskode <ismael.from.kone@gmail.com>
Date: Fri, 29 Oct 2021 09:29:48 +0000
Subject: [PATCH 3/3] harmonize buffer returned by 'convert_*' methods as
 'convert_string_colum' was different.

---
 protocol/pandas_implementation.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/protocol/pandas_implementation.py b/protocol/pandas_implementation.py
index 13521ea5..b3d8943e 100644
--- a/protocol/pandas_implementation.py
+++ b/protocol/pandas_implementation.py
@@ -122,9 +122,12 @@ def convert_column_to_ndarray(col : ColumnObject) -> np.ndarray:
     if col.describe_null[0] not in (0, 1):
         raise NotImplementedError("Null values represented as masks or "
                                   "sentinel values not handled yet")
-
-    _buffer, _dtype = col.get_buffers()["data"]
-    return buffer_to_ndarray(_buffer, _dtype), _buffer
+    buffers = col.get_buffers()
+    _buffer, _dtype = buffers["data"]
+    # there is a strange side effect (failing unit test) when replacing below
+    # `buffers` by `col.get_buffers()`. It is like the buffer has changed between
+    # the `buffer_to_ndarray` call and `col.get_buffers()`
+    return buffer_to_ndarray(_buffer, _dtype), buffers
 
 
 def buffer_to_ndarray(_buffer, _dtype) -> np.ndarray:
@@ -165,7 +168,8 @@ def convert_categorical_column(col : ColumnObject) -> pd.Series:
     #    categories = col._col.values.categories.values
     #    codes = col._col.values.codes
     categories = np.asarray(list(mapping.values()))
-    codes_buffer, codes_dtype = col.get_buffers()["data"]
+    buffers = col.get_buffers()
+    codes_buffer, codes_dtype = buffers["data"]
     codes = buffer_to_ndarray(codes_buffer, codes_dtype)
     values = categories[codes]
 
@@ -181,7 +185,7 @@ def convert_categorical_column(col : ColumnObject) -> pd.Series:
         raise NotImplementedError("Only categorical columns with sentinel "
                                   "value supported at the moment")
 
-    return series, codes_buffer
+    return series, buffers
 
 
 def convert_string_column(col : ColumnObject) -> np.ndarray: