From 6aa72ca2b5200a948bb09be0314f823b3975cde3 Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Tue, 13 Dec 2022 02:13:57 -0500
Subject: [PATCH 01/16] =?UTF-8?q?ENH:=20Add=20engine=20keyword=20to=20read?=
 =?UTF-8?q?=5Fjson=20to=20enable=20reading=20from=20pyarrow=C2=A0#48893?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pandas/_typing.py                           |   3 +
 pandas/io/json/_json.py                     |  83 ++++++++++--
 pandas/io/json/arrow_json_parser_wrapper.py |  35 +++++
 pandas/tests/io/json/conftest.py            |  24 ++++
 pandas/tests/io/json/test_readlines.py      | 136 +++++++++++++++-----
 5 files changed, 234 insertions(+), 47 deletions(-)
 create mode 100644 pandas/io/json/arrow_json_parser_wrapper.py

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 8d3044a978291..87979aba9ada4 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -324,6 +324,9 @@ def closed(self) -> bool:
 # read_csv engines
 CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
 
+# read_json engines
+JSONEngine = Literal["ujson", "pyarrow"]
+
 # read_xml parsers
 XMLParsers = Literal["lxml", "etree"]
 
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index aa1342d0f135f..fe87cb5aea2c2 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -31,6 +31,7 @@
     DtypeArg,
     FilePath,
     IndexLabel,
+    JSONEngine,
     JSONSerializable,
     ReadBuffer,
     StorageOptions,
@@ -72,6 +73,7 @@
     build_table_schema,
     parse_table_schema,
 )
+from pandas.io.json.arrow_json_parser_wrapper import ArrowJsonParserWrapper
 from pandas.io.parsers.readers import validate_integer
 
 if TYPE_CHECKING:
@@ -392,6 +394,7 @@ def read_json(
     date_unit: str | None = ...,
     encoding: str | None = ...,
     encoding_errors: str | None = ...,
+    engine: JSONEngine = ...,
     lines: bool = ...,
     chunksize: int,
     compression: CompressionOptions = ...,
@@ -421,6 +424,7 @@ def read_json(
     compression: CompressionOptions = ...,
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
+    engine: JSONEngine = ...,
     use_nullable_dtypes: bool = ...,
 ) -> JsonReader[Literal["series"]]:
     ...
@@ -446,6 +450,7 @@ def read_json(
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
     use_nullable_dtypes: bool = ...,
+    engine: JSONEngine = ...,
 ) -> Series:
     ...
 
@@ -470,6 +475,7 @@ def read_json(
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
     use_nullable_dtypes: bool = ...,
+    engine: JSONEngine = ...,
 ) -> DataFrame:
     ...
 
@@ -497,6 +503,7 @@ def read_json(
     nrows: int | None = None,
     storage_options: StorageOptions = None,
     use_nullable_dtypes: bool = False,
+    engine: JSONEngine = "ujson",
 ) -> DataFrame | Series | JsonReader:
     """
     Convert a JSON string to pandas object.
@@ -605,6 +612,9 @@ def read_json(
 
         .. versionadded:: 1.3.0
 
+    engine : {{'ujson', 'pyarrow'}}, default "ujson"
+        Parser engine to use.
+
     lines : bool, default False
         Read the file as a json object per line.
 
@@ -760,6 +770,7 @@ def read_json(
         storage_options=storage_options,
         encoding_errors=encoding_errors,
         use_nullable_dtypes=use_nullable_dtypes,
+        engine=engine,
     )
 
     if chunksize:
@@ -796,6 +807,7 @@ def __init__(
         storage_options: StorageOptions = None,
         encoding_errors: str | None = "strict",
         use_nullable_dtypes: bool = False,
+        engine: JSONEngine = "ujson",
     ) -> None:
 
         self.orient = orient
@@ -807,6 +819,7 @@ def __init__(
         self.precise_float = precise_float
         self.date_unit = date_unit
         self.encoding = encoding
+        self.engine = engine
         self.compression = compression
         self.storage_options = storage_options
         self.lines = lines
@@ -825,9 +838,48 @@ def __init__(
             self.nrows = validate_integer("nrows", self.nrows, 0)
             if not self.lines:
                 raise ValueError("nrows can only be passed if lines=True")
+        if self.engine == "pyarrow":
+            if not self.lines:
+                raise ValueError(
+                    "currently pyarrow engine only supports "
+                    "the line-delimited JSON format"
+                )
+        if self.engine not in ["pyarrow", "ujson"]:
+            raise ValueError(
+                f"The engine type {self.engine} is currently not supported."
+            )
+
+        if self.engine == "pyarrow":
+            self._engine = self._make_engine(filepath_or_buffer)
+        if self.engine == "ujson":
+            data = self._get_data_from_filepath(filepath_or_buffer)
+            self.data = self._preprocess_data(data)
+
+    def _make_engine(
+        self,
+        filepath_or_buffer: FilePath | ReadBuffer[str] | ReadBuffer[bytes],
+    ) -> ArrowJsonParserWrapper:
+
+        if not isinstance(filepath_or_buffer, list):
+            is_text = False
+            mode = "rb"
+            self.handles = get_handle(
+                self._get_data_from_filepath(filepath_or_buffer),
+                mode=mode,
+                encoding=self.encoding,
+                is_text=is_text,
+                compression=self.compression,
+                storage_options=self.storage_options,
+                errors=self.encoding_errors,
+            )
+            filepath_or_buffer = self.handles.handle
 
-        data = self._get_data_from_filepath(filepath_or_buffer)
-        self.data = self._preprocess_data(data)
+        try:
+            return ArrowJsonParserWrapper(filepath_or_buffer)
+        except Exception:
+            if self.handles is not None:
+                self.handles.close()
+            raise
 
     def _preprocess_data(self, data):
         """
@@ -912,19 +964,22 @@ def read(self) -> DataFrame | Series:
         """
         obj: DataFrame | Series
         with self:
-            if self.lines:
-                if self.chunksize:
-                    obj = concat(self)
-                elif self.nrows:
-                    lines = list(islice(self.data, self.nrows))
-                    lines_json = self._combine_lines(lines)
-                    obj = self._get_object_parser(lines_json)
+            if self.engine == "pyarrow":
+                obj = self._engine.read()
+            if self.engine == "ujson":
+                if self.lines:
+                    if self.chunksize:
+                        obj = concat(self)
+                    elif self.nrows:
+                        lines = list(islice(self.data, self.nrows))
+                        lines_json = self._combine_lines(lines)
+                        obj = self._get_object_parser(lines_json)
+                    else:
+                        data = ensure_str(self.data)
+                        data_lines = data.split("\n")
+                        obj = self._get_object_parser(self._combine_lines(data_lines))
                 else:
-                    data = ensure_str(self.data)
-                    data_lines = data.split("\n")
-                    obj = self._get_object_parser(self._combine_lines(data_lines))
-            else:
-                obj = self._get_object_parser(self.data)
+                    obj = self._get_object_parser(self.data)
         if self.use_nullable_dtypes:
             return obj.convert_dtypes(infer_objects=False)
         else:
diff --git a/pandas/io/json/arrow_json_parser_wrapper.py b/pandas/io/json/arrow_json_parser_wrapper.py
new file mode 100644
index 0000000000000..2980ed2f2772e
--- /dev/null
+++ b/pandas/io/json/arrow_json_parser_wrapper.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pandas._typing import ReadBuffer
+from pandas.compat._optional import import_optional_dependency
+
+if TYPE_CHECKING:
+    from pandas import DataFrame
+
+
+class ArrowJsonParserWrapper:
+    """
+    Wrapper for the pyarrow engine for read_json()
+    """
+
+    def __init__(self, src: ReadBuffer[bytes]) -> None:
+        self.src = src
+
+    def read(self) -> DataFrame:
+        """
+        Reads the contents of a JSON file into a DataFrame and
+        processes it according to the kwargs passed in the
+        constructor.
+
+        Returns
+        -------
+        DataFrame
+            The DataFrame created from the JSON file.
+        """
+        pyarrow_json = import_optional_dependency("pyarrow.json")
+        table = pyarrow_json.read_json(self.src)
+
+        frame = table.to_pandas()
+        return frame
diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py
index 4e848cd48b42d..b600064d08a46 100644
--- a/pandas/tests/io/json/conftest.py
+++ b/pandas/tests/io/json/conftest.py
@@ -7,3 +7,27 @@ def orient(request):
     Fixture for orients excluding the table format.
     """
     return request.param
+
+
+@pytest.fixture
+def json_dir_path(datapath):
+    """
+    The directory path to the data files needed for parser tests.
+    """
+    return datapath("io", "json", "data")
+
+
+@pytest.fixture(params=["ujson", "pyarrow"])
+def engine(request):
+    return request.param
+
+
+@pytest.fixture
+def json_engine_pyarrow_xfail(request):
+    """
+    Fixture that xfails a test if the engine is pyarrow.
+    """
+    engine = request.getfixturevalue("engine")
+    if engine == "pyarrow":
+        mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
+        request.node.add_marker(mark)
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index a76627fb08147..9dab7c54d9a72 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -1,4 +1,5 @@
 from io import StringIO
+import os
 from pathlib import Path
 from typing import Iterator
 
@@ -13,6 +14,8 @@
 
 from pandas.io.json._json import JsonReader
 
+xfail_json_engine_pyarrow = pytest.mark.usefixtures("json_engine_pyarrow_xfail")
+
 
 @pytest.fixture
 def lines_json_df():
@@ -20,21 +23,37 @@ def lines_json_df():
     return df.to_json(lines=True, orient="records")
 
 
-def test_read_jsonl():
+# pyarrow takes file path as an input
+# and only supports line delimited json
+@xfail_json_engine_pyarrow
+def test_read_jsonl(engine):
     # GH9180
-    result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
+    result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True, engine=engine)
     expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
     tm.assert_frame_equal(result, expected)
 
 
-def test_read_datetime():
+def test_read_jsonl_engine_pyarrow(json_dir_path, engine):
+    result = read_json(
+        os.path.join(json_dir_path, "line_delimited.json"),
+        lines=True,
+        engine=engine,
+    )
+    expected = DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]})
+    tm.assert_frame_equal(result, expected)
+
+
+# pyarrow takes file path as an input
+# and only supports line delimited json
+@xfail_json_engine_pyarrow
+def test_read_datetime(engine):
     # GH33787
     df = DataFrame(
         [([1, 2], ["2020-03-05", "2020-04-08T09:58:49+00:00"], "hector")],
         columns=["accounts", "date", "name"],
     )
     json_line = df.to_json(lines=True, orient="records")
-    result = read_json(json_line)
+    result = read_json(json_line, engine=engine)
     expected = DataFrame(
         [[1, "2020-03-05", "hector"], [2, "2020-04-08T09:58:49+00:00", "hector"]],
         columns=["accounts", "date", "name"],
@@ -42,20 +61,23 @@ def test_read_datetime():
     tm.assert_frame_equal(result, expected)
 
 
-def test_read_jsonl_unicode_chars():
+# pyarrow takes file path as an input
+# and only supports line delimited json
+@xfail_json_engine_pyarrow
+def test_read_jsonl_unicode_chars(engine):
     # GH15132: non-ascii unicode characters
     # \u201d == RIGHT DOUBLE QUOTATION MARK
 
     # simulate file handle
     json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
     json = StringIO(json)
-    result = read_json(json, lines=True)
+    result = read_json(json, lines=True, engine=engine)
     expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
     tm.assert_frame_equal(result, expected)
 
     # simulate string
     json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
-    result = read_json(json, lines=True)
+    result = read_json(json, lines=True, engine=engine)
     expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
     tm.assert_frame_equal(result, expected)
 
@@ -89,56 +111,80 @@ def test_to_jsonl_count_new_lines():
     assert actual_new_lines_count == expected_new_lines_count
 
 
+# pyarrow takes file path as an input
+# and only supports line delimited json
+# and doesn't have a chunksize argument
 @pytest.mark.parametrize("chunksize", [1, 1.0])
-def test_readjson_chunks(lines_json_df, chunksize):
+@xfail_json_engine_pyarrow
+def test_readjson_chunks(lines_json_df, chunksize, engine):
     # Basic test that read_json(chunks=True) gives the same result as
     # read_json(chunks=False)
     # GH17048: memory usage when lines=True
 
     unchunked = read_json(StringIO(lines_json_df), lines=True)
-    with read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize) as reader:
+    with read_json(
+        StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine
+    ) as reader:
         chunked = pd.concat(reader)
 
     tm.assert_frame_equal(chunked, unchunked)
 
 
-def test_readjson_chunksize_requires_lines(lines_json_df):
+def test_readjson_chunksize_requires_lines(lines_json_df, engine):
     msg = "chunksize can only be passed if lines=True"
     with pytest.raises(ValueError, match=msg):
-        with read_json(StringIO(lines_json_df), lines=False, chunksize=2) as _:
+        with read_json(
+            StringIO(lines_json_df), lines=False, chunksize=2, engine=engine
+        ) as _:
             pass
 
 
-def test_readjson_chunks_series():
+# pyarrow takes file path as an input
+# and only supports line delimited json
+# and doesn't have a chunksize argument
+@xfail_json_engine_pyarrow
+def test_readjson_chunks_series(engine):
     # Test reading line-format JSON to Series with chunksize param
     s = pd.Series({"A": 1, "B": 2})
 
     strio = StringIO(s.to_json(lines=True, orient="records"))
-    unchunked = read_json(strio, lines=True, typ="Series")
+    unchunked = read_json(strio, lines=True, typ="Series", engine=engine)
 
     strio = StringIO(s.to_json(lines=True, orient="records"))
-    with read_json(strio, lines=True, typ="Series", chunksize=1) as reader:
+    with read_json(
+        strio, lines=True, typ="Series", chunksize=1, engine=engine
+    ) as reader:
         chunked = pd.concat(reader)
 
     tm.assert_series_equal(chunked, unchunked)
 
 
-def test_readjson_each_chunk(lines_json_df):
+# pyarrow takes file path as an input
+# and only supports line delimited json
+# and doesn't have a chunksize argument
+@xfail_json_engine_pyarrow
+def test_readjson_each_chunk(lines_json_df, engine):
     # Other tests check that the final result of read_json(chunksize=True)
     # is correct. This checks the intermediate chunks.
-    with read_json(StringIO(lines_json_df), lines=True, chunksize=2) as reader:
+    with read_json(
+        StringIO(lines_json_df), lines=True, chunksize=2, engine=engine
+    ) as reader:
         chunks = list(reader)
     assert chunks[0].shape == (2, 2)
     assert chunks[1].shape == (1, 2)
 
 
-def test_readjson_chunks_from_file():
+# pyarrow takes file path as an input
+# and only supports line delimited json
+# and doesn't have a chunksize argument
+@xfail_json_engine_pyarrow
+def test_readjson_chunks_from_file(engine):
     with tm.ensure_clean("test.json") as path:
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
         df.to_json(path, lines=True, orient="records")
-        with read_json(path, lines=True, chunksize=1) as reader:
+        with read_json(path, lines=True, chunksize=1, engine=engine) as reader:
             chunked = pd.concat(reader)
-        unchunked = read_json(path, lines=True)
+        unchunked = read_json(path, lines=True, engine=engine)
         tm.assert_frame_equal(unchunked, chunked)
 
 
@@ -171,16 +217,22 @@ def test_readjson_chunks_closes(chunksize):
 
 
 @pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])
-def test_readjson_invalid_chunksize(lines_json_df, chunksize):
+def test_readjson_invalid_chunksize(lines_json_df, chunksize, engine):
     msg = r"'chunksize' must be an integer >=1"
 
     with pytest.raises(ValueError, match=msg):
-        with read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize) as _:
+        with read_json(
+            StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine
+        ) as _:
             pass
 
 
 @pytest.mark.parametrize("chunksize", [None, 1, 2])
-def test_readjson_chunks_multiple_empty_lines(chunksize):
+# pyarrow takes file path as an input
+# and only supports line delimited json
+# and doesn't have a chunksize argument
+@xfail_json_engine_pyarrow
+def test_readjson_chunks_multiple_empty_lines(chunksize, engine):
     j = """
 
     {"A":1,"B":4}
@@ -198,52 +250,66 @@ def test_readjson_chunks_multiple_empty_lines(chunksize):
     {"A":3,"B":6}
     """
     orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
-    test = read_json(j, lines=True, chunksize=chunksize)
+    test = read_json(j, lines=True, chunksize=chunksize, engine=engine)
     if chunksize is not None:
         with test:
             test = pd.concat(test)
     tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}")
 
 
-def test_readjson_unicode(monkeypatch):
+# pyarrow takes file path as an input
+# and only supports line delimited json
+# and doesn't have a chunksize argument
+@xfail_json_engine_pyarrow
+def test_readjson_unicode(monkeypatch, engine):
     with tm.ensure_clean("test.json") as path:
         monkeypatch.setattr("locale.getpreferredencoding", lambda do_setlocale: "cp949")
         with open(path, "w", encoding="utf-8") as f:
             f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}')
 
-        result = read_json(path)
+        result = read_json(path, engine=engine)
         expected = DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]})
         tm.assert_frame_equal(result, expected)
 
 
+# pyarrow takes file path as an input
+# and only supports line delimited json
+# and doesn't have a chunksize argument
+@xfail_json_engine_pyarrow
 @pytest.mark.parametrize("nrows", [1, 2])
-def test_readjson_nrows(nrows):
+def test_readjson_nrows(nrows, engine):
     # GH 33916
     # Test reading line-format JSON to Series with nrows param
     jsonl = """{"a": 1, "b": 2}
         {"a": 3, "b": 4}
         {"a": 5, "b": 6}
         {"a": 7, "b": 8}"""
-    result = read_json(jsonl, lines=True, nrows=nrows)
+    result = read_json(jsonl, lines=True, nrows=nrows, engine=engine)
     expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]
     tm.assert_frame_equal(result, expected)
 
 
+# pyarrow takes file path as an input
+# and only supports line delimited json
+# and doesn't have a chunksize argument
+@xfail_json_engine_pyarrow
 @pytest.mark.parametrize("nrows,chunksize", [(2, 2), (4, 2)])
-def test_readjson_nrows_chunks(nrows, chunksize):
+def test_readjson_nrows_chunks(nrows, chunksize, engine):
     # GH 33916
     # Test reading line-format JSON to Series with nrows and chunksize param
     jsonl = """{"a": 1, "b": 2}
         {"a": 3, "b": 4}
         {"a": 5, "b": 6}
         {"a": 7, "b": 8}"""
-    with read_json(jsonl, lines=True, nrows=nrows, chunksize=chunksize) as reader:
+    with read_json(
+        jsonl, lines=True, nrows=nrows, chunksize=chunksize, engine=engine
+    ) as reader:
         chunked = pd.concat(reader)
     expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]
     tm.assert_frame_equal(chunked, expected)
 
 
-def test_readjson_nrows_requires_lines():
+def test_readjson_nrows_requires_lines(engine):
     # GH 33916
     # Test ValuError raised if nrows is set without setting lines in read_json
     jsonl = """{"a": 1, "b": 2}
@@ -252,10 +318,14 @@ def test_readjson_nrows_requires_lines():
         {"a": 7, "b": 8}"""
     msg = "nrows can only be passed if lines=True"
     with pytest.raises(ValueError, match=msg):
-        read_json(jsonl, lines=False, nrows=2)
+        read_json(jsonl, lines=False, nrows=2, engine=engine)
 
 
-def test_readjson_lines_chunks_fileurl(datapath):
+# pyarrow takes file path as an input
+# and only supports line delimited json
+# and doesn't have a chunksize argument
+@xfail_json_engine_pyarrow
+def test_readjson_lines_chunks_fileurl(datapath, engine):
     # GH 27135
     # Test reading line-format JSON from file url
     df_list_expected = [
@@ -265,7 +335,7 @@ def test_readjson_lines_chunks_fileurl(datapath):
     ]
     os_path = datapath("io", "json", "data", "line_delimited.json")
     file_url = Path(os_path).as_uri()
-    with read_json(file_url, lines=True, chunksize=1) as url_reader:
+    with read_json(file_url, lines=True, chunksize=1, engine=engine) as url_reader:
         for index, chuck in enumerate(url_reader):
             tm.assert_frame_equal(chuck, df_list_expected[index])
 

From c248b84d078bff2335f1ea9502dfea98e60c92f6 Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Thu, 19 Jan 2023 22:51:31 -0500
Subject: [PATCH 02/16] moved argument to the end of signature, fixed elifs

---
 pandas/io/json/_json.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index fe87cb5aea2c2..3008ee19d4c84 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -394,13 +394,13 @@ def read_json(
     date_unit: str | None = ...,
     encoding: str | None = ...,
     encoding_errors: str | None = ...,
-    engine: JSONEngine = ...,
     lines: bool = ...,
     chunksize: int,
     compression: CompressionOptions = ...,
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
     use_nullable_dtypes: bool = ...,
+    engine: JSONEngine = ...,
 ) -> JsonReader[Literal["frame"]]:
     ...
 
@@ -424,8 +424,8 @@ def read_json(
     compression: CompressionOptions = ...,
     nrows: int | None = ...,
     storage_options: StorageOptions = ...,
-    engine: JSONEngine = ...,
     use_nullable_dtypes: bool = ...,
+    engine: JSONEngine = ...,
 ) -> JsonReader[Literal["series"]]:
     ...
 
@@ -851,7 +851,7 @@ def __init__(
 
         if self.engine == "pyarrow":
             self._engine = self._make_engine(filepath_or_buffer)
-        if self.engine == "ujson":
+        elif self.engine == "ujson":
             data = self._get_data_from_filepath(filepath_or_buffer)
             self.data = self._preprocess_data(data)
 

From 91b81cfe5fe6ae80c72ab3876fd29867ec23a6e1 Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Thu, 19 Jan 2023 22:53:21 -0500
Subject: [PATCH 03/16] Adding finally

---
 pandas/io/json/_json.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 3008ee19d4c84..d8201a60f8da0 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -879,7 +879,8 @@ def _make_engine(
         except Exception:
             if self.handles is not None:
                 self.handles.close()
-            raise
+        finally:
+            raise Exception
 
     def _preprocess_data(self, data):
         """

From 902066364c1c9b195d2a0688b0cf9e414788adb1 Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Tue, 24 Jan 2023 23:28:54 -0500
Subject: [PATCH 04/16] Updated the _make_engine try-finally block

---
 pandas/io/json/_json.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index d8201a60f8da0..e70a6f1d18e98 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -876,11 +876,9 @@ def _make_engine(
 
         try:
             return ArrowJsonParserWrapper(filepath_or_buffer)
-        except Exception:
+        finally:
             if self.handles is not None:
                 self.handles.close()
-        finally:
-            raise Exception
 
     def _preprocess_data(self, data):
         """

From 0388c4e7c55fa8f0624938cfcfd3fd2efeee2e17 Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Tue, 13 Dec 2022 02:13:57 -0500
Subject: [PATCH 05/16] Fixing merge conflicts

---
 pandas/io/json/_json.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index e70a6f1d18e98..ad3d8b7b07300 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -21,6 +21,9 @@
 
 import numpy as np
 
+from pandas._config import using_nullable_dtypes
+
+from pandas._libs import lib
 from pandas._libs.json import (
     dumps,
     loads,
@@ -502,7 +505,7 @@ def read_json(
     compression: CompressionOptions = "infer",
     nrows: int | None = None,
     storage_options: StorageOptions = None,
-    use_nullable_dtypes: bool = False,
+    use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
     engine: JSONEngine = "ujson",
 ) -> DataFrame | Series | JsonReader:
     """
@@ -742,6 +745,12 @@ def read_json(
     if orient == "table" and convert_axes:
         raise ValueError("cannot pass both convert_axes and orient='table'")
 
+    use_nullable_dtypes = (
+        use_nullable_dtypes
+        if use_nullable_dtypes is not lib.no_default
+        else using_nullable_dtypes()
+    )
+
     if dtype is None and orient != "table":
         # error: Incompatible types in assignment (expression has type "bool", variable
         # has type "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float],

From c660395fe5de1efbc5cfd57835ad4b1e1cf94166 Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Tue, 13 Dec 2022 02:13:57 -0500
Subject: [PATCH 06/16] Refactored pyarrow engine code

---
 pandas/io/json/_json.py | 31 +++----------------------------
 1 file changed, 3 insertions(+), 28 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 3653597efc742..aab2b7a8d8128 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -858,37 +858,12 @@ def __init__(
                 f"The engine type {self.engine} is currently not supported."
             )
 
+        data = self._get_data_from_filepath(filepath_or_buffer)
         if self.engine == "pyarrow":
-            self._engine = self._make_engine(filepath_or_buffer)
+            self.data = ArrowJsonParserWrapper(filepath_or_buffer)
         elif self.engine == "ujson":
-            data = self._get_data_from_filepath(filepath_or_buffer)
             self.data = self._preprocess_data(data)
 
-    def _make_engine(
-        self,
-        filepath_or_buffer: FilePath | ReadBuffer[str] | ReadBuffer[bytes],
-    ) -> ArrowJsonParserWrapper:
-
-        if not isinstance(filepath_or_buffer, list):
-            is_text = False
-            mode = "rb"
-            self.handles = get_handle(
-                self._get_data_from_filepath(filepath_or_buffer),
-                mode=mode,
-                encoding=self.encoding,
-                is_text=is_text,
-                compression=self.compression,
-                storage_options=self.storage_options,
-                errors=self.encoding_errors,
-            )
-            filepath_or_buffer = self.handles.handle
-
-        try:
-            return ArrowJsonParserWrapper(filepath_or_buffer)
-        finally:
-            if self.handles is not None:
-                self.handles.close()
-
     def _preprocess_data(self, data):
         """
         At this point, the data either has a `read` attribute (e.g. a file
@@ -973,7 +948,7 @@ def read(self) -> DataFrame | Series:
         obj: DataFrame | Series
         with self:
             if self.engine == "pyarrow":
-                obj = self._engine.read()
+                obj = self.data.read()
             if self.engine == "ujson":
                 if self.lines:
                     if self.chunksize:

From eb709e7221f3d353c50dfb55e3f89f91e69838f1 Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Tue, 13 Dec 2022 02:13:57 -0500
Subject: [PATCH 07/16] Refactored pyarrow implementation to inline

---
 pandas/io/json/_json.py                     | 18 ++++++++---
 pandas/io/json/arrow_json_parser_wrapper.py | 35 ---------------------
 pandas/tests/io/json/test_readlines.py      |  3 +-
 3 files changed, 16 insertions(+), 40 deletions(-)
 delete mode 100644 pandas/io/json/arrow_json_parser_wrapper.py

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index aab2b7a8d8128..86f145c59e968 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -40,6 +40,7 @@
     StorageOptions,
     WriteBuffer,
 )
+from pandas.compat._optional import import_optional_dependency
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import doc
 
@@ -76,7 +77,6 @@
     build_table_schema,
     parse_table_schema,
 )
-from pandas.io.json.arrow_json_parser_wrapper import ArrowJsonParserWrapper
 from pandas.io.parsers.readers import validate_integer
 
 if TYPE_CHECKING:
@@ -661,6 +661,10 @@ def read_json(
 
         .. versionadded:: 2.0
 
+    engine : {{'ujson', 'pyarrow'}}, default "ujson"
+        Parser engine to use.
+
+
     Returns
     -------
     Series or DataFrame
@@ -843,6 +847,10 @@ def __init__(
             self.chunksize = validate_integer("chunksize", self.chunksize, 1)
             if not self.lines:
                 raise ValueError("chunksize can only be passed if lines=True")
+            if self.engine == "pyarrow":
+                raise ValueError(
+                    "currently pyarrow engine doesn't support chunksize parameter"
+                )
         if self.nrows is not None:
             self.nrows = validate_integer("nrows", self.nrows, 0)
             if not self.lines:
@@ -858,10 +866,10 @@ def __init__(
                 f"The engine type {self.engine} is currently not supported."
             )
 
-        data = self._get_data_from_filepath(filepath_or_buffer)
         if self.engine == "pyarrow":
-            self.data = ArrowJsonParserWrapper(filepath_or_buffer)
+            self.data = filepath_or_buffer
         elif self.engine == "ujson":
+            data = self._get_data_from_filepath(filepath_or_buffer)
             self.data = self._preprocess_data(data)
 
     def _preprocess_data(self, data):
@@ -948,7 +956,9 @@ def read(self) -> DataFrame | Series:
         obj: DataFrame | Series
         with self:
             if self.engine == "pyarrow":
-                obj = self.data.read()
+                pyarrow_json = import_optional_dependency("pyarrow.json")
+                table = pyarrow_json.read_json(self.data)
+                obj = table.to_pandas()
             if self.engine == "ujson":
                 if self.lines:
                     if self.chunksize:
diff --git a/pandas/io/json/arrow_json_parser_wrapper.py b/pandas/io/json/arrow_json_parser_wrapper.py
deleted file mode 100644
index 2980ed2f2772e..0000000000000
--- a/pandas/io/json/arrow_json_parser_wrapper.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-from pandas._typing import ReadBuffer
-from pandas.compat._optional import import_optional_dependency
-
-if TYPE_CHECKING:
-    from pandas import DataFrame
-
-
-class ArrowJsonParserWrapper:
-    """
-    Wrapper for the pyarrow engine for read_json()
-    """
-
-    def __init__(self, src: ReadBuffer[bytes]) -> None:
-        self.src = src
-
-    def read(self) -> DataFrame:
-        """
-        Reads the contents of a JSON file into a DataFrame and
-        processes it according to the kwargs passed in the
-        constructor.
-
-        Returns
-        -------
-        DataFrame
-            The DataFrame created from the JSON file.
-        """
-        pyarrow_json = import_optional_dependency("pyarrow.json")
-        table = pyarrow_json.read_json(self.src)
-
-        frame = table.to_pandas()
-        return frame
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index 9dab7c54d9a72..3a7dd3cde5f29 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -34,6 +34,7 @@ def test_read_jsonl(engine):
 
 
 def test_read_jsonl_engine_pyarrow(json_dir_path, engine):
+    print(os.path.join(json_dir_path, "line_delimited.json"))
     result = read_json(
         os.path.join(json_dir_path, "line_delimited.json"),
         lines=True,
@@ -114,8 +115,8 @@ def test_to_jsonl_count_new_lines():
 # pyarrow takes file path as an input
 # and only supports line delimited json
 # and doesn't have a chunksize argument
-@pytest.mark.parametrize("chunksize", [1, 1.0])
 @xfail_json_engine_pyarrow
+@pytest.mark.parametrize("chunksize", [1, 1.0])
 def test_readjson_chunks(lines_json_df, chunksize, engine):
     # Basic test that read_json(chunks=True) gives the same result as
     # read_json(chunks=False)

From 7eccd8323831e2abf68941c329a2e523bbaa4a10 Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Tue, 31 Jan 2023 23:58:17 -0500
Subject: [PATCH 08/16] Small refactors

---
 pandas/io/json/_json.py                | 7 ++-----
 pandas/tests/io/json/test_readlines.py | 1 -
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index f1b1b2dc28781..d8d66ad9ebcb4 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -615,9 +615,6 @@ def read_json(
 
         .. versionadded:: 1.3.0
 
-    engine : {{'ujson', 'pyarrow'}}, default "ujson"
-        Parser engine to use.
-
     lines : bool, default False
         Read the file as a json object per line.
 
@@ -863,7 +860,7 @@ def __init__(
                     "currently pyarrow engine only supports "
                     "the line-delimited JSON format"
                 )
-        if self.engine not in ["pyarrow", "ujson"]:
+        if self.engine not in {"pyarrow", "ujson"}:
             raise ValueError(
                 f"The engine type {self.engine} is currently not supported."
             )
@@ -961,7 +958,7 @@ def read(self) -> DataFrame | Series:
                 pyarrow_json = import_optional_dependency("pyarrow.json")
                 table = pyarrow_json.read_json(self.data)
                 obj = table.to_pandas()
-            if self.engine == "ujson":
+            elif self.engine == "ujson":
                 if self.lines:
                     if self.chunksize:
                         obj = concat(self)
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index 3a7dd3cde5f29..0701ebf9dee19 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -34,7 +34,6 @@ def test_read_jsonl(engine):
 
 
 def test_read_jsonl_engine_pyarrow(json_dir_path, engine):
-    print(os.path.join(json_dir_path, "line_delimited.json"))
     result = read_json(
         os.path.join(json_dir_path, "line_delimited.json"),
         lines=True,

From b4409bb83234e5fb407b6998dc882311042b11ce Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Wed, 1 Feb 2023 12:04:45 -0500
Subject: [PATCH 09/16] Fixing double lines

---
 pandas/io/json/_json.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index d8d66ad9ebcb4..bf03510b45a37 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -663,7 +663,6 @@ def read_json(
     engine : {{'ujson', 'pyarrow'}}, default "ujson"
         Parser engine to use.
 
-
     Returns
     -------
     Series or DataFrame

From 38bc7dbe83bff23e7528da214d197befa9cca64d Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Thu, 2 Feb 2023 09:49:57 -0500
Subject: [PATCH 10/16] - Added the logic for skipping test if pyarrow is not
 installed. - Added reason to fail to each pyarrow xfail test.

---
 pandas/tests/io/json/conftest.py       |   4 +-
 pandas/tests/io/json/test_readlines.py | 131 ++++++++++++++++---------
 2 files changed, 89 insertions(+), 46 deletions(-)

diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py
index b600064d08a46..66d9d199f429f 100644
--- a/pandas/tests/io/json/conftest.py
+++ b/pandas/tests/io/json/conftest.py
@@ -19,7 +19,9 @@ def json_dir_path(datapath):
 
 @pytest.fixture(params=["ujson", "pyarrow"])
 def engine(request):
-    return request.param
+    if request.param == "pyarrow":
+        pytest.importorskip("pyarrow.json")
+        return request.param
 
 
 @pytest.fixture
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index 0701ebf9dee19..9073afbd601cb 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -23,11 +23,13 @@ def lines_json_df():
     return df.to_json(lines=True, orient="records")
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
 @xfail_json_engine_pyarrow
-def test_read_jsonl(engine):
+def test_read_jsonl(request, engine):
     # GH9180
+    if engine == "pyarrow":
+        reason = "Pyarrow only supports a file path as an input and line delimited json"
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True, engine=engine)
     expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
     tm.assert_frame_equal(result, expected)
@@ -43,11 +45,13 @@ def test_read_jsonl_engine_pyarrow(json_dir_path, engine):
     tm.assert_frame_equal(result, expected)
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
 @xfail_json_engine_pyarrow
-def test_read_datetime(engine):
+def test_read_datetime(request, engine):
     # GH33787
+    if engine == "pyarrow":
+        reason = "Pyarrow only supports a file path as an input and line delimited json"
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     df = DataFrame(
         [([1, 2], ["2020-03-05", "2020-04-08T09:58:49+00:00"], "hector")],
         columns=["accounts", "date", "name"],
@@ -61,12 +65,13 @@ def test_read_datetime(engine):
     tm.assert_frame_equal(result, expected)
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
 @xfail_json_engine_pyarrow
-def test_read_jsonl_unicode_chars(engine):
+def test_read_jsonl_unicode_chars(request, engine):
     # GH15132: non-ascii unicode characters
     # \u201d == RIGHT DOUBLE QUOTATION MARK
+    if engine == "pyarrow":
+        reason = "Pyarrow only supports a file path as an input and line delimited json"
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
 
     # simulate file handle
     json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
@@ -111,16 +116,20 @@ def test_to_jsonl_count_new_lines():
     assert actual_new_lines_count == expected_new_lines_count
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
-# and doesn't have a chunksize argument
 @xfail_json_engine_pyarrow
 @pytest.mark.parametrize("chunksize", [1, 1.0])
-def test_readjson_chunks(lines_json_df, chunksize, engine):
+def test_readjson_chunks(request, lines_json_df, chunksize, engine):
     # Basic test that read_json(chunks=True) gives the same result as
     # read_json(chunks=False)
     # GH17048: memory usage when lines=True
 
+    if engine == "pyarrow":
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     unchunked = read_json(StringIO(lines_json_df), lines=True)
     with read_json(
         StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine
@@ -139,11 +148,15 @@ def test_readjson_chunksize_requires_lines(lines_json_df, engine):
             pass
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
-# and doesn't have a chunksize argument
 @xfail_json_engine_pyarrow
-def test_readjson_chunks_series(engine):
+def test_readjson_chunks_series(request, engine):
+    if engine == "pyarrow":
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     # Test reading line-format JSON to Series with chunksize param
     s = pd.Series({"A": 1, "B": 2})
 
@@ -159,11 +172,15 @@ def test_readjson_chunks_series(engine):
     tm.assert_series_equal(chunked, unchunked)
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
-# and doesn't have a chunksize argument
 @xfail_json_engine_pyarrow
-def test_readjson_each_chunk(lines_json_df, engine):
+def test_readjson_each_chunk(request, lines_json_df, engine):
+    if engine == "pyarrow":
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     # Other tests check that the final result of read_json(chunksize=True)
     # is correct. This checks the intermediate chunks.
     with read_json(
@@ -174,11 +191,15 @@ def test_readjson_each_chunk(lines_json_df, engine):
     assert chunks[1].shape == (1, 2)
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
-# and doesn't have a chunksize argument
 @xfail_json_engine_pyarrow
-def test_readjson_chunks_from_file(engine):
+def test_readjson_chunks_from_file(request, engine):
+    if engine == "pyarrow":
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     with tm.ensure_clean("test.json") as path:
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
         df.to_json(path, lines=True, orient="records")
@@ -228,11 +249,15 @@ def test_readjson_invalid_chunksize(lines_json_df, chunksize, engine):
 
 
 @pytest.mark.parametrize("chunksize", [None, 1, 2])
-# pyarrow takes file path as an input
-# and only supports line delimited json
-# and doesn't have a chunksize argument
 @xfail_json_engine_pyarrow
-def test_readjson_chunks_multiple_empty_lines(chunksize, engine):
+def test_readjson_chunks_multiple_empty_lines(request, chunksize, engine):
+    if engine == "pyarrow":
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     j = """
 
     {"A":1,"B":4}
@@ -257,11 +282,15 @@ def test_readjson_chunks_multiple_empty_lines(chunksize, engine):
     tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}")
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
-# and doesn't have a chunksize argument
 @xfail_json_engine_pyarrow
-def test_readjson_unicode(monkeypatch, engine):
+def test_readjson_unicode(request, monkeypatch, engine):
+    if engine == "pyarrow":
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     with tm.ensure_clean("test.json") as path:
         monkeypatch.setattr("locale.getpreferredencoding", lambda do_setlocale: "cp949")
         with open(path, "w", encoding="utf-8") as f:
@@ -272,14 +301,18 @@ def test_readjson_unicode(monkeypatch, engine):
         tm.assert_frame_equal(result, expected)
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
-# and doesn't have a chunksize argument
 @xfail_json_engine_pyarrow
 @pytest.mark.parametrize("nrows", [1, 2])
-def test_readjson_nrows(nrows, engine):
+def test_readjson_nrows(request, nrows, engine):
     # GH 33916
     # Test reading line-format JSON to Series with nrows param
+    if engine == "pyarrow":
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     jsonl = """{"a": 1, "b": 2}
         {"a": 3, "b": 4}
         {"a": 5, "b": 6}
@@ -289,14 +322,18 @@ def test_readjson_nrows(nrows, engine):
     tm.assert_frame_equal(result, expected)
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
-# and doesn't have a chunksize argument
 @xfail_json_engine_pyarrow
 @pytest.mark.parametrize("nrows,chunksize", [(2, 2), (4, 2)])
-def test_readjson_nrows_chunks(nrows, chunksize, engine):
+def test_readjson_nrows_chunks(request, nrows, chunksize, engine):
     # GH 33916
     # Test reading line-format JSON to Series with nrows and chunksize param
+    if engine == "pyarrow":
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     jsonl = """{"a": 1, "b": 2}
         {"a": 3, "b": 4}
         {"a": 5, "b": 6}
@@ -321,13 +358,17 @@ def test_readjson_nrows_requires_lines(engine):
         read_json(jsonl, lines=False, nrows=2, engine=engine)
 
 
-# pyarrow takes file path as an input
-# and only supports line delimited json
-# and doesn't have a chunksize argument
 @xfail_json_engine_pyarrow
-def test_readjson_lines_chunks_fileurl(datapath, engine):
+def test_readjson_lines_chunks_fileurl(request, datapath, engine):
     # GH 27135
     # Test reading line-format JSON from file url
+    if engine == "pyarrow":
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.node.add_marker(pytest.mark.xfail(reason=reason))
+
     df_list_expected = [
         DataFrame([[1, 2]], columns=["a", "b"], index=[0]),
         DataFrame([[3, 4]], columns=["a", "b"], index=[1]),

From bed15df6618d35feab00ed9f0066becff07f09ac Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Thu, 2 Feb 2023 10:37:05 -0500
Subject: [PATCH 11/16] Added else statement to conftest engine

---
 pandas/tests/io/json/conftest.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py
index 66d9d199f429f..5488894e317e2 100644
--- a/pandas/tests/io/json/conftest.py
+++ b/pandas/tests/io/json/conftest.py
@@ -22,6 +22,8 @@ def engine(request):
     if request.param == "pyarrow":
         pytest.importorskip("pyarrow.json")
         return request.param
+    else:
+        return request.param
 
 
 @pytest.fixture

From cdfd747c2638c246dbaae0ccaeeaaf3202eed57b Mon Sep 17 00:00:00 2001
From: Arda Kosar <berkaykosar@sabanciuniv.edu>
Date: Thu, 2 Feb 2023 13:23:52 -0500
Subject: [PATCH 12/16] - removed xfail decorators - removed xfail fixture from
 conftest.py

---
 pandas/tests/io/json/conftest.py       | 11 -----------
 pandas/tests/io/json/test_readlines.py | 14 --------------
 2 files changed, 25 deletions(-)

diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py
index 5488894e317e2..8e2b167e576df 100644
--- a/pandas/tests/io/json/conftest.py
+++ b/pandas/tests/io/json/conftest.py
@@ -24,14 +24,3 @@ def engine(request):
         return request.param
     else:
         return request.param
-
-
-@pytest.fixture
-def json_engine_pyarrow_xfail(request):
-    """
-    Fixture that xfails a test if the engine is pyarrow.
-    """
-    engine = request.getfixturevalue("engine")
-    if engine == "pyarrow":
-        mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
-        request.node.add_marker(mark)
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index 9073afbd601cb..beecb546ae321 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -14,8 +14,6 @@
 
 from pandas.io.json._json import JsonReader
 
-xfail_json_engine_pyarrow = pytest.mark.usefixtures("json_engine_pyarrow_xfail")
-
 
 @pytest.fixture
 def lines_json_df():
@@ -23,7 +21,6 @@ def lines_json_df():
     return df.to_json(lines=True, orient="records")
 
 
-@xfail_json_engine_pyarrow
 def test_read_jsonl(request, engine):
     # GH9180
     if engine == "pyarrow":
@@ -45,7 +42,6 @@ def test_read_jsonl_engine_pyarrow(json_dir_path, engine):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_json_engine_pyarrow
 def test_read_datetime(request, engine):
     # GH33787
     if engine == "pyarrow":
@@ -65,7 +61,6 @@ def test_read_datetime(request, engine):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_json_engine_pyarrow
 def test_read_jsonl_unicode_chars(request, engine):
     # GH15132: non-ascii unicode characters
     # \u201d == RIGHT DOUBLE QUOTATION MARK
@@ -116,7 +111,6 @@ def test_to_jsonl_count_new_lines():
     assert actual_new_lines_count == expected_new_lines_count
 
 
-@xfail_json_engine_pyarrow
 @pytest.mark.parametrize("chunksize", [1, 1.0])
 def test_readjson_chunks(request, lines_json_df, chunksize, engine):
     # Basic test that read_json(chunks=True) gives the same result as
@@ -148,7 +142,6 @@ def test_readjson_chunksize_requires_lines(lines_json_df, engine):
             pass
 
 
-@xfail_json_engine_pyarrow
 def test_readjson_chunks_series(request, engine):
     if engine == "pyarrow":
         reason = (
@@ -172,7 +165,6 @@ def test_readjson_chunks_series(request, engine):
     tm.assert_series_equal(chunked, unchunked)
 
 
-@xfail_json_engine_pyarrow
 def test_readjson_each_chunk(request, lines_json_df, engine):
     if engine == "pyarrow":
         reason = (
@@ -191,7 +183,6 @@ def test_readjson_each_chunk(request, lines_json_df, engine):
     assert chunks[1].shape == (1, 2)
 
 
-@xfail_json_engine_pyarrow
 def test_readjson_chunks_from_file(request, engine):
     if engine == "pyarrow":
         reason = (
@@ -249,7 +240,6 @@ def test_readjson_invalid_chunksize(lines_json_df, chunksize, engine):
 
 
 @pytest.mark.parametrize("chunksize", [None, 1, 2])
-@xfail_json_engine_pyarrow
 def test_readjson_chunks_multiple_empty_lines(request, chunksize, engine):
     if engine == "pyarrow":
         reason = (
@@ -282,7 +272,6 @@ def test_readjson_chunks_multiple_empty_lines(request, chunksize, engine):
     tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}")
 
 
-@xfail_json_engine_pyarrow
 def test_readjson_unicode(request, monkeypatch, engine):
     if engine == "pyarrow":
         reason = (
@@ -301,7 +290,6 @@ def test_readjson_unicode(request, monkeypatch, engine):
         tm.assert_frame_equal(result, expected)
 
 
-@xfail_json_engine_pyarrow
 @pytest.mark.parametrize("nrows", [1, 2])
 def test_readjson_nrows(request, nrows, engine):
     # GH 33916
@@ -322,7 +310,6 @@ def test_readjson_nrows(request, nrows, engine):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_json_engine_pyarrow
 @pytest.mark.parametrize("nrows,chunksize", [(2, 2), (4, 2)])
 def test_readjson_nrows_chunks(request, nrows, chunksize, engine):
     # GH 33916
@@ -358,7 +345,6 @@ def test_readjson_nrows_requires_lines(engine):
         read_json(jsonl, lines=False, nrows=2, engine=engine)
 
 
-@xfail_json_engine_pyarrow
 def test_readjson_lines_chunks_fileurl(request, datapath, engine):
     # GH 27135
     # Test reading line-format JSON from file url

From ab7af44bdbfdfdedc5c0a205ca01c145bdb1348b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 9 Feb 2023 12:47:30 -0800
Subject: [PATCH 13/16] add whatsnew, address comments

---
 doc/source/whatsnew/v2.0.0.rst         |  1 +
 pandas/io/json/_json.py                | 34 +++++++++++----
 pandas/tests/io/json/conftest.py       | 12 +-----
 pandas/tests/io/json/test_pandas.py    |  7 ++++
 pandas/tests/io/json/test_readlines.py | 58 ++++++++------------------
 5 files changed, 52 insertions(+), 60 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 9fd9faf057a8a..296c24469df4e 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -298,6 +298,7 @@ Other enhancements
 - Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`)
 - Added :meth:`Series.dt.unit` and :meth:`Series.dt.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`51223`)
 - Added new argument ``dtype`` to :func:`read_sql` to be consistent with :func:`read_sql_query` (:issue:`50797`)
+- Added new argument ``engine`` to :func:`read_json` to support parsing JSON with pyarrow by specifying ``engine="pyarrow"`` (:issue:`48893`)
 - Added support for SQLAlchemy 2.0 (:issue:`40686`)
 -
 
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 826186612d5ab..093d90f90c54a 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -21,7 +21,10 @@
 
 import numpy as np
 
-from pandas._config import using_nullable_dtypes
+from pandas._config import (
+    get_option,
+    using_nullable_dtypes,
+)
 
 from pandas._libs import lib
 from pandas._libs.json import (
@@ -841,6 +844,10 @@ def __init__(
         self.handles: IOHandles[str] | None = None
         self.use_nullable_dtypes = use_nullable_dtypes
 
+        if self.engine not in {"pyarrow", "ujson"}:
+            raise ValueError(
+                f"The engine type {self.engine} is currently not supported."
+            )
         if self.chunksize is not None:
             self.chunksize = validate_integer("chunksize", self.chunksize, 1)
             if not self.lines:
@@ -859,12 +866,6 @@ def __init__(
                     "currently pyarrow engine only supports "
                     "the line-delimited JSON format"
                 )
-        if self.engine not in {"pyarrow", "ujson"}:
-            raise ValueError(
-                f"The engine type {self.engine} is currently not supported."
-            )
-
-        if self.engine == "pyarrow":
             self.data = filepath_or_buffer
         elif self.engine == "ujson":
             data = self._get_data_from_filepath(filepath_or_buffer)
@@ -955,8 +956,23 @@ def read(self) -> DataFrame | Series:
         with self:
             if self.engine == "pyarrow":
                 pyarrow_json = import_optional_dependency("pyarrow.json")
-                table = pyarrow_json.read_json(self.data)
-                obj = table.to_pandas()
+                pa_table = pyarrow_json.read_json(self.data)
+                if (
+                    self.use_nullable_dtypes
+                    and get_option("mode.dtype_backend") == "pyarrow"
+                ):
+                    from pandas.arrays import ArrowExtensionArray
+
+                    obj = DataFrame(
+                        {
+                            col_name: ArrowExtensionArray(pa_col)
+                            for col_name, pa_col in zip(
+                                pa_table.column_names, pa_table.itercolumns()
+                            )
+                        }
+                    )
+                    return obj
+                obj = pa_table.to_pandas()
             elif self.engine == "ujson":
                 if self.lines:
                     if self.chunksize:
diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py
index 8e2b167e576df..f3736252e850a 100644
--- a/pandas/tests/io/json/conftest.py
+++ b/pandas/tests/io/json/conftest.py
@@ -9,18 +9,8 @@ def orient(request):
     return request.param
 
 
-@pytest.fixture
-def json_dir_path(datapath):
-    """
-    The directory path to the data files needed for parser tests.
-    """
-    return datapath("io", "json", "data")
-
-
 @pytest.fixture(params=["ujson", "pyarrow"])
 def engine(request):
     if request.param == "pyarrow":
         pytest.importorskip("pyarrow.json")
-        return request.param
-    else:
-        return request.param
+    return request.param
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index f59e1e8cbe43d..9728afd09e8b5 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1956,3 +1956,10 @@ def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
             expected = Series(ArrowExtensionArray(pa.array(expected, from_pandas=True)))
 
         tm.assert_series_equal(result, expected)
+
+
+def test_invalid_engine():
+    ser = Series(range(1))
+    out = ser.to_json()
+    with pytest.raises(ValueError, match="The engine type foo"):
+        read_json(out, engine="foo")
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index beecb546ae321..e4d68688ebf28 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -1,5 +1,4 @@
 from io import StringIO
-import os
 from pathlib import Path
 from typing import Iterator
 
@@ -21,20 +20,16 @@ def lines_json_df():
     return df.to_json(lines=True, orient="records")
 
 
-def test_read_jsonl(request, engine):
+def test_read_jsonl():
     # GH9180
-    if engine == "pyarrow":
-        reason = "Pyarrow only supports a file path as an input and line delimited json"
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
-
-    result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True, engine=engine)
+    result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
     expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
     tm.assert_frame_equal(result, expected)
 
 
-def test_read_jsonl_engine_pyarrow(json_dir_path, engine):
+def test_read_jsonl_engine_pyarrow(datapath, engine):
     result = read_json(
-        os.path.join(json_dir_path, "line_delimited.json"),
+        datapath("io", "json", "data", "line_delimited.json"),
         lines=True,
         engine=engine,
     )
@@ -46,7 +41,7 @@ def test_read_datetime(request, engine):
     # GH33787
     if engine == "pyarrow":
         reason = "Pyarrow only supports a file path as an input and line delimited json"
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
+        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     df = DataFrame(
         [([1, 2], ["2020-03-05", "2020-04-08T09:58:49+00:00"], "hector")],
@@ -61,23 +56,20 @@ def test_read_datetime(request, engine):
     tm.assert_frame_equal(result, expected)
 
 
-def test_read_jsonl_unicode_chars(request, engine):
+def test_read_jsonl_unicode_chars():
     # GH15132: non-ascii unicode characters
     # \u201d == RIGHT DOUBLE QUOTATION MARK
-    if engine == "pyarrow":
-        reason = "Pyarrow only supports a file path as an input and line delimited json"
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
 
     # simulate file handle
     json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
     json = StringIO(json)
-    result = read_json(json, lines=True, engine=engine)
+    result = read_json(json, lines=True)
     expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
     tm.assert_frame_equal(result, expected)
 
     # simulate string
     json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
-    result = read_json(json, lines=True, engine=engine)
+    result = read_json(json, lines=True)
     expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
     tm.assert_frame_equal(result, expected)
 
@@ -122,7 +114,7 @@ def test_readjson_chunks(request, lines_json_df, chunksize, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
+        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     unchunked = read_json(StringIO(lines_json_df), lines=True)
     with read_json(
@@ -171,7 +163,7 @@ def test_readjson_each_chunk(request, lines_json_df, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
+        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     # Other tests check that the final result of read_json(chunksize=True)
     # is correct. This checks the intermediate chunks.
@@ -189,7 +181,7 @@ def test_readjson_chunks_from_file(request, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
+        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     with tm.ensure_clean("test.json") as path:
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
@@ -240,14 +232,7 @@ def test_readjson_invalid_chunksize(lines_json_df, chunksize, engine):
 
 
 @pytest.mark.parametrize("chunksize", [None, 1, 2])
-def test_readjson_chunks_multiple_empty_lines(request, chunksize, engine):
-    if engine == "pyarrow":
-        reason = (
-            "Pyarrow only supports a file path as an input and line delimited json"
-            "and doesn't support chunksize parameter."
-        )
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
-
+def test_readjson_chunks_multiple_empty_lines(chunksize):
     j = """
 
     {"A":1,"B":4}
@@ -265,7 +250,7 @@ def test_readjson_chunks_multiple_empty_lines(request, chunksize, engine):
     {"A":3,"B":6}
     """
     orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
-    test = read_json(j, lines=True, chunksize=chunksize, engine=engine)
+    test = read_json(j, lines=True, chunksize=chunksize)
     if chunksize is not None:
         with test:
             test = pd.concat(test)
@@ -278,7 +263,7 @@ def test_readjson_unicode(request, monkeypatch, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
+        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     with tm.ensure_clean("test.json") as path:
         monkeypatch.setattr("locale.getpreferredencoding", lambda do_setlocale: "cp949")
@@ -291,21 +276,14 @@ def test_readjson_unicode(request, monkeypatch, engine):
 
 
 @pytest.mark.parametrize("nrows", [1, 2])
-def test_readjson_nrows(request, nrows, engine):
+def test_readjson_nrows(nrows, engine):
     # GH 33916
     # Test reading line-format JSON to Series with nrows param
-    if engine == "pyarrow":
-        reason = (
-            "Pyarrow only supports a file path as an input and line delimited json"
-            "and doesn't support chunksize parameter."
-        )
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
-
     jsonl = """{"a": 1, "b": 2}
         {"a": 3, "b": 4}
         {"a": 5, "b": 6}
         {"a": 7, "b": 8}"""
-    result = read_json(jsonl, lines=True, nrows=nrows, engine=engine)
+    result = read_json(jsonl, lines=True, nrows=nrows)
     expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]
     tm.assert_frame_equal(result, expected)
 
@@ -319,7 +297,7 @@ def test_readjson_nrows_chunks(request, nrows, chunksize, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
+        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     jsonl = """{"a": 1, "b": 2}
         {"a": 3, "b": 4}
@@ -353,7 +331,7 @@ def test_readjson_lines_chunks_fileurl(request, datapath, engine):
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
         )
-        request.node.add_marker(pytest.mark.xfail(reason=reason))
+        request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
     df_list_expected = [
         DataFrame([[1, 2]], columns=["a", "b"], index=[0]),

From 8c9655346b2b6e0eab95310c305c93e2212a27e4 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 9 Feb 2023 14:53:18 -0800
Subject: [PATCH 14/16] address review

---
 doc/source/user_guide/io.rst           | 10 ++++++
 pandas/io/json/_json.py                | 49 ++++++++++++++------------
 pandas/tests/io/json/test_pandas.py    |  1 +
 pandas/tests/io/json/test_readlines.py |  8 +++++
 4 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 1c3cdd9f4cffd..d654b1aed6836 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -2250,6 +2250,16 @@ For line-delimited json files, pandas can also return an iterator which reads in
       for chunk in reader:
           print(chunk)
 
+Line-limited json can also be read using the pyarrow reader by specifying ``engine="pyarrow"``.
+
+.. ipython:: python
+
+   from io import BytesIO
+   df = pd.read_json(BytesIO(jsonl.encode()), lines=True, engine="pyarrow")
+   df
+
+.. versionadded:: 2.0.0
+
 .. _io.table_schema:
 
 Table schema
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 093d90f90c54a..1494b11125319 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -663,8 +663,11 @@ def read_json(
 
         .. versionadded:: 2.0
 
-    engine : {{'ujson', 'pyarrow'}}, default "ujson"
-        Parser engine to use.
+    engine : {{"ujson", "pyarrow"}}, default "ujson"
+        Parser engine to use. The ``"pyarrow"`` engine is only available when
+        ``lines=True``.
+
+        .. versionadded:: 2.0
 
     Returns
     -------
@@ -957,22 +960,24 @@ def read(self) -> DataFrame | Series:
             if self.engine == "pyarrow":
                 pyarrow_json = import_optional_dependency("pyarrow.json")
                 pa_table = pyarrow_json.read_json(self.data)
-                if (
-                    self.use_nullable_dtypes
-                    and get_option("mode.dtype_backend") == "pyarrow"
-                ):
-                    from pandas.arrays import ArrowExtensionArray
-
-                    obj = DataFrame(
-                        {
-                            col_name: ArrowExtensionArray(pa_col)
-                            for col_name, pa_col in zip(
-                                pa_table.column_names, pa_table.itercolumns()
-                            )
-                        }
-                    )
-                    return obj
-                obj = pa_table.to_pandas()
+                if self.use_nullable_dtypes:
+                    if get_option("mode.dtype_backend") == "pyarrow":
+                        from pandas.arrays import ArrowExtensionArray
+
+                        return DataFrame(
+                            {
+                                col_name: ArrowExtensionArray(pa_col)
+                                for col_name, pa_col in zip(
+                                    pa_table.column_names, pa_table.itercolumns()
+                                )
+                            }
+                        )
+                    elif get_option("mode.dtype_backend") == "pandas":
+                        from pandas.io._util import _arrow_dtype_mapping
+
+                        mapping = _arrow_dtype_mapping()
+                        return pa_table.to_pandas(types_mapper=mapping.get)
+                return pa_table.to_pandas()
             elif self.engine == "ujson":
                 if self.lines:
                     if self.chunksize:
@@ -987,10 +992,10 @@ def read(self) -> DataFrame | Series:
                         obj = self._get_object_parser(self._combine_lines(data_lines))
                 else:
                     obj = self._get_object_parser(self.data)
-        if self.use_nullable_dtypes:
-            return obj.convert_dtypes(infer_objects=False)
-        else:
-            return obj
+                if self.use_nullable_dtypes:
+                    return obj.convert_dtypes(infer_objects=False)
+                else:
+                    return obj
 
     def _get_object_parser(self, json) -> DataFrame | Series:
         """
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 9728afd09e8b5..3e361ad6852f9 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1959,6 +1959,7 @@ def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
 
 
 def test_invalid_engine():
+    # GH 48893
     ser = Series(range(1))
     out = ser.to_json()
     with pytest.raises(ValueError, match="The engine type foo"):
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index e4d68688ebf28..9b36423be73dd 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -40,6 +40,7 @@ def test_read_jsonl_engine_pyarrow(datapath, engine):
 def test_read_datetime(request, engine):
     # GH33787
     if engine == "pyarrow":
+        # GH 48893
         reason = "Pyarrow only supports a file path as an input and line delimited json"
         request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError))
 
@@ -110,6 +111,7 @@ def test_readjson_chunks(request, lines_json_df, chunksize, engine):
     # GH17048: memory usage when lines=True
 
     if engine == "pyarrow":
+        # GH 48893
         reason = (
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
@@ -136,6 +138,7 @@ def test_readjson_chunksize_requires_lines(lines_json_df, engine):
 
 def test_readjson_chunks_series(request, engine):
     if engine == "pyarrow":
+        # GH 48893
         reason = (
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
@@ -159,6 +162,7 @@ def test_readjson_chunks_series(request, engine):
 
 def test_readjson_each_chunk(request, lines_json_df, engine):
     if engine == "pyarrow":
+        # GH 48893
         reason = (
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
@@ -177,6 +181,7 @@ def test_readjson_each_chunk(request, lines_json_df, engine):
 
 def test_readjson_chunks_from_file(request, engine):
     if engine == "pyarrow":
+        # GH 48893
         reason = (
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
@@ -259,6 +264,7 @@ def test_readjson_chunks_multiple_empty_lines(chunksize):
 
 def test_readjson_unicode(request, monkeypatch, engine):
     if engine == "pyarrow":
+        # GH 48893
         reason = (
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
@@ -293,6 +299,7 @@ def test_readjson_nrows_chunks(request, nrows, chunksize, engine):
     # GH 33916
     # Test reading line-format JSON to Series with nrows and chunksize param
     if engine == "pyarrow":
+        # GH 48893
         reason = (
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."
@@ -327,6 +334,7 @@ def test_readjson_lines_chunks_fileurl(request, datapath, engine):
     # GH 27135
     # Test reading line-format JSON from file url
     if engine == "pyarrow":
+        # GH 48893
         reason = (
             "Pyarrow only supports a file path as an input and line delimited json"
             "and doesn't support chunksize parameter."

From 9cbf598a58aa64b9a36135313c2a14efe4be1922 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 9 Feb 2023 15:25:58 -0800
Subject: [PATCH 15/16] Add note about param

---
 doc/source/user_guide/io.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index d654b1aed6836..d0cd5c300248b 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -2069,6 +2069,8 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series``
 * ``lines`` : reads file as one json object per line.
 * ``encoding`` : The encoding to use to decode py3 bytes.
 * ``chunksize`` : when used in combination with ``lines=True``, return a JsonReader which reads in ``chunksize`` lines per iteration.
+* ``engine``: Either ``"ujson"``, the built-in JSON parser, or ``"pyarrow"`` which dispatches to pyarrow's ``pyarrow.json.read_json``.
+  The ``"pyarrow"`` is only available when ``lines=True``
 
 The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is not parseable.
 

From c59310bd7e1e60a7a279c27e35f00d5152475922 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 9 Feb 2023 18:13:02 -0800
Subject: [PATCH 16/16] Add test with lines=false

---
 pandas/tests/io/json/test_pandas.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 3e361ad6852f9..db839353934b0 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1964,3 +1964,11 @@ def test_invalid_engine():
     out = ser.to_json()
     with pytest.raises(ValueError, match="The engine type foo"):
         read_json(out, engine="foo")
+
+
+def test_pyarrow_engine_lines_false():
+    # GH 48893
+    ser = Series(range(1))
+    out = ser.to_json()
+    with pytest.raises(ValueError, match="currently pyarrow engine only supports"):
+        read_json(out, engine="pyarrow", lines=False)