pandas-dev
diff --git a/‎pandas/__init__.py
Lines changed: 1 addition & 0 deletions b/‎pandas/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/core/config_init.py
Lines changed: 16 additions & 0 deletions b/‎pandas/core/config_init.py
Lines changed: 16 additions & 0 deletions
diff --git a/‎pandas/io/api.py
Lines changed: 1 addition & 0 deletions b/‎pandas/io/api.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/io/orc.py
Lines changed: 126 additions & 0 deletions b/‎pandas/io/orc.py
Lines changed: 126 additions & 0 deletions
diff --git a/‎pandas/tests/io/data/orc/TestOrcFile.decimal.orc
16 KB b/‎pandas/tests/io/data/orc/TestOrcFile.decimal.orc
16 KB
diff --git a/‎pandas/tests/io/data/orc/TestOrcFile.emptyFile.orc
523 Bytes b/‎pandas/tests/io/data/orc/TestOrcFile.emptyFile.orc
523 Bytes
diff --git a/‎pandas/tests/io/data/orc/TestOrcFile.test1.orc
1.67 KB b/‎pandas/tests/io/data/orc/TestOrcFile.test1.orc
1.67 KB
diff --git a/‎pandas/tests/io/data/orc/TestOrcFile.testDate1900.orc
30.2 KB b/‎pandas/tests/io/data/orc/TestOrcFile.testDate1900.orc
30.2 KB
diff --git a/‎pandas/tests/io/data/orc/TestOrcFile.testDate2038.orc
93.5 KB b/‎pandas/tests/io/data/orc/TestOrcFile.testDate2038.orc
93.5 KB
diff --git a/‎pandas/tests/io/data/orc/TestOrcFile.testSnappy.orc
123 KB b/‎pandas/tests/io/data/orc/TestOrcFile.testSnappy.orc
123 KB
@@ -165,6 +165,7 @@
     # misc
     read_clipboard,
     read_parquet,
+    read_orc,
     read_feather,
     read_gbq,
     read_html,
 
@@ -568,6 +568,22 @@ def use_inf_as_na_cb(key):
         validator=is_one_of_factory(["auto", "pyarrow", "fastparquet"]),
     )
 
+
+# Set up the io.orc specific configuration.
+orc_engine_doc = """
+: string
+    The default orc reader/writer engine. Available options:
+    'auto', 'pyarrow', the default is 'auto'
+"""
+
+with cf.config_prefix("io.orc"):
+    cf.register_option(
+        "engine",
+        "auto",
+        orc_engine_doc,
+        validator=is_one_of_factory(["auto", "pyarrow"]),
+    )
+
 # --------
 # Plotting
 # ---------
 
@@ -10,6 +10,7 @@
 from pandas.io.gbq import read_gbq
 from pandas.io.html import read_html
 from pandas.io.json import read_json
+from pandas.io.orc import read_orc
 from pandas.io.packers import read_msgpack, to_msgpack
 from pandas.io.parquet import read_parquet
 from pandas.io.parsers import read_csv, read_fwf, read_table
 
@@ -0,0 +1,126 @@
+""" orc compat """
+
+from warnings import catch_warnings
+
+from pandas.compat._optional import import_optional_dependency
+from pandas.errors import AbstractMethodError
+
+from pandas import DataFrame, get_option
+
+from pandas.io.common import get_filepath_or_buffer, is_gcs_url, is_s3_url
+
+
+def get_engine(engine):
+    """ return our implementation """
+
+    if engine == "auto":
+        engine = get_option("io.orc.engine")
+
+    if engine == "auto":
+        # try engines in this order
+        try:
+            return PyArrowImpl()
+        except ImportError:
+            pass
+
+        raise ImportError(
+            "Unable to find a usable engine; "
+            "tried using: 'pyarrow'.\n"
+            "pyarrow is required for orc "
+            "support"
+        )
+
+    if engine not in ["pyarrow"]:
+        raise ValueError("engine must be 'pyarrow'")
+
+    if engine == "pyarrow":
+        return PyArrowImpl()
+
+
+class BaseImpl:
+
+    api = None  # module
+
+    @staticmethod
+    def validate_dataframe(df):
+
+        if not isinstance(df, DataFrame):
+            raise ValueError("to_orc only supports IO with DataFrames")
+
+        # must have value column names (strings only)
+        if df.columns.inferred_type not in {"string", "unicode", "empty"}:
+            raise ValueError("ORC must have string column names")
+
+        # index level names must be strings
+        valid_names = all(
+            isinstance(name, str) for name in df.index.names if name is not None
+        )
+        if not valid_names:
+            raise ValueError("Index level names must be strings")
+
+    def write(self, df, path, compression, **kwargs):
+        raise AbstractMethodError(self)
+
+    def read(self, path, columns=None, **kwargs):
+        raise AbstractMethodError(self)
+
+
+class PyArrowImpl(BaseImpl):
+    def __init__(self):
+        pyarrow = import_optional_dependency(
+            "pyarrow", extra="pyarrow is required for orc support."
+        )
+        import pyarrow.orc
+
+        self.api = pyarrow
+
+    def read(self, path, columns=None, **kwargs):
+        path, _, _, _ = get_filepath_or_buffer(path)
+
+        py_file = self.api.input_stream(path)
+        orc_file = self.api.orc.ORCFile(py_file)
+
+        result = orc_file.read(
+            columns=columns, **kwargs
+        ).to_pandas()
+
+        return result
+
+
+def read_orc(path, engine="auto", columns=None, **kwargs):
+    """
+    Load an ORC object from the file path, returning a DataFrame.
+
+    .. versionadded:: 0.21.0
+
+    Parameters
+    ----------
+    path : str, path object or file-like object
+        Any valid string path is acceptable. The string could be a URL. Valid
+        URL schemes include http, ftp, s3, and file. For file URLs, a host is
+        expected. A local file could be:
+        ``file://localhost/path/to/table.orc``.
+
+        If you want to pass in a path object, pandas accepts any
+        ``os.PathLike``.
+
+        By file-like object, we refer to objects with a ``read()`` method,
+        such as a file handler (e.g. via builtin ``open`` function)
+        or ``StringIO``.
+    engine : {'auto', 'pyarrow'}, default 'auto'
+        ORC library to use. If 'auto', then the option ``io.orc.engine`` is
+        used. The default ``io.orc.engine`` behavior is to try 'pyarrow'.
+    columns : list, default=None
+        If not None, only these columns will be read from the file.
+
+        .. versionadded:: 0.21.1
+    **kwargs
+        Any additional kwargs are passed to the engine.
+
+    Returns
+    -------
+    DataFrame
+    """
+
+    impl = get_engine(engine)
+    return impl.read(path, columns=columns, **kwargs)