From fe849207cd177334f0d5533d163915b1ffe97eff Mon Sep 17 00:00:00 2001
From: NickFillot <40593450+NickFillot@users.noreply.github.com>
Date: Sun, 3 Oct 2021 16:23:02 +0200
Subject: [PATCH 1/3] [ENH] to_orc

pandas.io.orc.to_orc method definition
---
 pandas/io/orc.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 6bdb4df806b5c..8900f30f6a440 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -1,15 +1,19 @@
 """ orc compat """
 from __future__ import annotations
 
+import os
 from typing import TYPE_CHECKING
+from tempfile import gettempdir
 
 from pandas._typing import FilePathOrBuffer
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.io.common import get_handle
 
-if TYPE_CHECKING:
-    from pandas import DataFrame
+from pandas.core import generic
+from pandas.util._decorators import doc
+
+from pandas import DataFrame
 
 
 def read_orc(
@@ -55,3 +59,78 @@ def read_orc(
     with get_handle(path, "rb", is_text=False) as handles:
         orc_file = orc.ORCFile(handles.handle)
         return orc_file.read(columns=columns, **kwargs).to_pandas()
+
+
+def to_orc(
+    df: DataFrame,
+    path: FilePathOrBuffer = None,
+    engine: str = 'pyarrow',
+    index: bool = None,
+    **kwargs
+) -> bytes:
+    """
+    Write a DataFrame to the orc/arrow format.
+    Parameters
+    ----------
+    df : DataFrame
+    path : str or file-like object, default None
+        If a string, it will be used as Root Directory path
+        when writing a partitioned dataset. By file-like object,
+        we refer to objects with a write() method, such as a file handle
+        (e.g. via builtin open function) or io.BytesIO. The engine
+        fastparquet does not accept file-like objects. If path is None,
+        a bytes object is returned.
+    engine : {{'pyarrow'}}, default 'pyarrow'
+        Parquet library to use, or library it self, checked with 'pyarrow' name
+        and version > 4.0.0
+    index : bool, default None
+        If ``True``, include the dataframe's index(es) in the file output. If
+        ``False``, they will not be written to the file.
+        If ``None``, similar to ``infer`` the dataframe's index(es)
+        will be saved. However, instead of being saved as values,
+        the RangeIndex will be stored as a range in the metadata so it
+        doesn't require much space and is faster. Other indexes will
+        be included as columns in the file output.
+    kwargs
+        Additional keyword arguments passed to the engine
+    Returns
+    -------
+    bytes if no path argument is provided else None
+    """
+    if index is None:
+        index = df.index.names[0] is not None
+    
+    if isinstance(engine, str):
+        engine = import_optional_dependency(engine, min_version='4.0.0')
+    else:
+        try:
+            assert engine.__name__ == 'pyarrow', "engine must be 'pyarrow' module"
+            assert hasattr(engine, 'orc'), "'pyarrow' module must have version > 4.0.0 with orc module"
+        except Exception as e:
+            raise ValueError("Wrong engine passed, %s" % (
+                e,
+            ))
+            
+    if path is None:
+        # to bytes: tmp path, pyarrow auto closes buffers
+        path = os.path.join(gettempdir(), os.urandom(12).hex())
+        try:
+            engine.orc.write_table(
+                engine.Table.from_pandas(df, preserve_index=index),
+                path, **kwargs
+            )
+            with open(path, 'rb') as path:
+                return path.read()
+        except BaseException as e:
+            raise e
+        finally:
+            try:
+                os.remove(path)
+            except Exception as e:
+                pass
+    else:
+        engine.orc.write_table(
+            engine.Table.from_pandas(df, preserve_index=index),
+            path, **kwargs
+        )
+    return

From 6cc7030cb23511aeaf803a69fde89040e9fc6ae4 Mon Sep 17 00:00:00 2001
From: NickFillot <40593450+NickFillot@users.noreply.github.com>
Date: Sun, 3 Oct 2021 16:34:37 +0200
Subject: [PATCH 2/3] pandas.DataFrame.to_orc

set to_orc to pandas.DataFrame
---
 pandas/core/frame.py | 74 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index aad7213c93a1d..e52ef00c348d6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2734,7 +2734,81 @@ def to_parquet(
             storage_options=storage_options,
             **kwargs,
         )
+    
+    def to_orc(
+        self,
+        path: FilePathOrBuffer = None,
+        engine: str = 'pyarrow',
+        index: bool = None,
+        **kwargs
+    ) -> bytes:
+        """
+        Write a DataFrame to the orc/arrow format.
+        Parameters
+        ----------
+        df : DataFrame
+        path : str or file-like object, default None
+            If a string, it will be used as Root Directory path
+            when writing a partitioned dataset. By file-like object,
+            we refer to objects with a write() method, such as a file handle
+            (e.g. via builtin open function) or io.BytesIO. The engine
+            fastparquet does not accept file-like objects. If path is None,
+            a bytes object is returned.
+        engine : {{'pyarrow'}}, default 'pyarrow'
+            Parquet library to use, or library it self, checked with 'pyarrow' name
+            and version > 4.0.0
+        index : bool, default None
+            If ``True``, include the dataframe's index(es) in the file output. If
+            ``False``, they will not be written to the file.
+            If ``None``, similar to ``infer`` the dataframe's index(es)
+            will be saved. However, instead of being saved as values,
+            the RangeIndex will be stored as a range in the metadata so it
+            doesn't require much space and is faster. Other indexes will
+            be included as columns in the file output.
+        kwargs
+            Additional keyword arguments passed to the engine
+        Returns
+        -------
+        bytes if no path argument is provided else None
 
+        See Also
+        --------
+        read_orc : Read a ORC file.
+        DataFrame.to_parquet : Write a parquet file.
+        DataFrame.to_csv : Write a csv file.
+        DataFrame.to_sql : Write to a sql table.
+        DataFrame.to_hdf : Write to hdf.
+
+        Notes
+        -----
+        This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_ library.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(data={{'col1': [1, 2], 'col2': [3, 4]}})
+        >>> df.to_orc('df.orc', compression='gzip')  # doctest: +SKIP
+        >>> pd.read_orc('df.orc')  # doctest: +SKIP
+           col1  col2
+        0     1     3
+        1     2     4
+
+        If you want to get a buffer to the orc content you can write it to io.BytesIO
+        >>> import io
+        >>> b = io.BytesIO(df.to_orc())
+        >>> b.seek(0)
+        0
+        >>> content = b.read()
+        """
+        from pandas.io.orc import to_orc
+
+        return to_orc(
+            self,
+            path,
+            engine,
+            index=index,
+            **kwargs
+        )
+    
     @Substitution(
         header_type="bool",
         header="Whether to print column labels, default True",

From 2d1515eb1ca49eb08539add28db2467a75911905 Mon Sep 17 00:00:00 2001
From: NickFillot <40593450+NickFillot@users.noreply.github.com>
Date: Sun, 3 Oct 2021 16:47:11 +0200
Subject: [PATCH 3/3] Cleaning

---
 pandas/io/orc.py | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 8900f30f6a440..d444d38aa2486 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -2,6 +2,8 @@
 from __future__ import annotations
 
 import os
+import pandas._testing as tm
+
 from typing import TYPE_CHECKING
 from tempfile import gettempdir
 
@@ -10,10 +12,8 @@
 
 from pandas.io.common import get_handle
 
-from pandas.core import generic
-from pandas.util._decorators import doc
-
-from pandas import DataFrame
+if TYPE_CHECKING:
+    from pandas import DataFrame
 
 
 def read_orc(
@@ -105,29 +105,19 @@ def to_orc(
     else:
         try:
             assert engine.__name__ == 'pyarrow', "engine must be 'pyarrow' module"
-            assert hasattr(engine, 'orc'), "'pyarrow' module must have version > 4.0.0 with orc module"
+            assert hasattr(engine, 'orc'), "'pyarrow' module must have orc module"
         except Exception as e:
-            raise ValueError("Wrong engine passed, %s" % (
-                e,
-            ))
+            raise ValueError("Wrong engine passed, %s" % e)
             
     if path is None:
         # to bytes: tmp path, pyarrow auto closes buffers
-        path = os.path.join(gettempdir(), os.urandom(12).hex())
-        try:
+        with tm.ensure_clean(os.path.join(gettempdir(), os.urandom(12).hex())) as path:
             engine.orc.write_table(
                 engine.Table.from_pandas(df, preserve_index=index),
                 path, **kwargs
             )
             with open(path, 'rb') as path:
                 return path.read()
-        except BaseException as e:
-            raise e
-        finally:
-            try:
-                os.remove(path)
-            except Exception as e:
-                pass
     else:
         engine.orc.write_table(
             engine.Table.from_pandas(df, preserve_index=index),