From e429a5063b465b228405ecd6e0c08af5d6ff3aaf Mon Sep 17 00:00:00 2001
From: Alenka Frim <frim.alenka@gmail.com>
Date: Tue, 7 Sep 2021 11:35:52 +0200
Subject: [PATCH 1/4] Added _format_str: mapping NumPy to Arrow format strings

---
 protocol/pandas_implementation.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/protocol/pandas_implementation.py b/protocol/pandas_implementation.py
index a016d25c..e0ef2590 100644
--- a/protocol/pandas_implementation.py
+++ b/protocol/pandas_implementation.py
@@ -435,10 +435,32 @@ def _dtype_from_pandasdtype(self, dtype) -> Tuple[enum.IntEnum, int, str, str]:
             raise NotImplementedError(f"Data type {dtype} not handled yet")
 
         bitwidth = dtype.itemsize * 8
-        format_str = dtype.str
+        format_str = self._format_str(dtype.str)
         endianness = dtype.byteorder if not kind == _k.CATEGORICAL else '='
         return (kind, bitwidth, format_str, endianness)
 
+    def _format_str(self, format_str) -> str:
+        """
+        Mapping of NumPy formt strings to
+        Apache Arrow C Data Interface format strings.
+        'O' categorical mapped as 'U': large utf-8 string for now
+        """
+        _ints = {8: 'c', 16: 's', 32: 'i', 64: 'l'}
+        _uints = {8: 'C', 16: 'S', 32: 'I', 64: 'L'}
+        _floats = {16: 'e', 32: 'f', 64: 'g'}
+        _np_dtypes = {'i': _ints, 'u': _uints, 'f': _floats, 'b': {8: 'b'}, 'O': {64: 'U'}} 
+        
+        dt = np.dtype(format_str)
+        if dt.byteorder == '>':
+            raise ValueError(f"Big-endian not supported by exchange"
+                                 "protocol")
+            
+        arrow_format_str = _np_dtypes.get(dt.kind, {}).get(dt.itemsize*8)
+        
+        if arrow_format_str is None:
+            raise NotImplementedError(f"Format string {format_str} not handled yet")
+            
+        return arrow_format_str
 
     @property
     def describe_categorical(self) -> Dict[str, Any]:

From 1dd21bbb2e0fb595459eb25feaa4a51088904bd9 Mon Sep 17 00:00:00 2001
From: Alenka Frim <frim.alenka@gmail.com>
Date: Tue, 7 Sep 2021 11:58:30 +0200
Subject: [PATCH 2/4] Added test for _format_str

---
 protocol/pandas_implementation.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/protocol/pandas_implementation.py b/protocol/pandas_implementation.py
index e0ef2590..a455bc51 100644
--- a/protocol/pandas_implementation.py
+++ b/protocol/pandas_implementation.py
@@ -902,6 +902,12 @@ def test_metadata():
     assert_dataframe_equal(df.__dataframe__(), df)
     tm.assert_frame_equal(df, df2)
 
+def test_fromat_str():
+    df = pd.DataFrame(data=dict(a=[1, 2, 3], B=[3, 4, 5],
+                                c=[1.5, 2.5, 3.5], D=["a", "b", "cdef"]))
+    df["B"] = df["B"].astype("category")
+    df["D"] = df["D"].astype("object")
+    df2 = from_dataframe(df)
 
 if __name__ == '__main__':
     test_categorical_dtype()

From 8f43391d4c81aeddc5be31561c70473abb9df4ca Mon Sep 17 00:00:00 2001
From: Athan <kgryte@gmail.com>
Date: Tue, 7 Sep 2021 10:15:00 -0700
Subject: [PATCH 3/4] Fix typo

---
 protocol/pandas_implementation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protocol/pandas_implementation.py b/protocol/pandas_implementation.py
index a455bc51..bb73e5ca 100644
--- a/protocol/pandas_implementation.py
+++ b/protocol/pandas_implementation.py
@@ -441,7 +441,7 @@ def _dtype_from_pandasdtype(self, dtype) -> Tuple[enum.IntEnum, int, str, str]:
 
     def _format_str(self, format_str) -> str:
         """
-        Mapping of NumPy formt strings to
+        Mapping of NumPy format strings to
         Apache Arrow C Data Interface format strings.
         'O' categorical mapped as 'U': large utf-8 string for now
         """

From 82066208be8943fa4292d6bf7400d1dbb0c58744 Mon Sep 17 00:00:00 2001
From: Alenka Frim <frim.alenka@gmail.com>
Date: Wed, 8 Sep 2021 10:15:14 +0200
Subject: [PATCH 4/4] Fix incomplete test

---
 protocol/pandas_implementation.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/protocol/pandas_implementation.py b/protocol/pandas_implementation.py
index a455bc51..a4239326 100644
--- a/protocol/pandas_implementation.py
+++ b/protocol/pandas_implementation.py
@@ -907,7 +907,11 @@ def test_fromat_str():
                                 c=[1.5, 2.5, 3.5], D=["a", "b", "cdef"]))
     df["B"] = df["B"].astype("category")
     df["D"] = df["D"].astype("object")
-    df2 = from_dataframe(df)
+    
+    format_strings = {'a': 'l', 'B': 'U', 'c': 'g', 'D': 'u'}
+    for col in df.columns.tolist():
+        column = _PandasColumn(df[col])
+        assert column.dtype[2] == format_strings[col]
 
 if __name__ == '__main__':
     test_categorical_dtype()