Adding tests for append functionality, along with updated whatsnew, user_guide, and generic docstring.

SFuller4 · SFuller4 · commit 042853436f08 · 2022-09-13T22:02:29.000-05:00
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -1910,6 +1910,7 @@ with optional parameters:
 * ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'.
 * ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serializable object.
 * ``lines`` : If ``records`` orient, then will write each record per line as json.
+* ``mode`` : string, writer mode when writing to path. 'w' for write, 'a' for append. Default 'w'
 
 Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters.
 
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -333,6 +333,7 @@ Other enhancements
 - :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
 - The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`)
 - The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`)
+- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.notable_bug_fixes:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2400,7 +2400,7 @@ def to_json(
         index: bool_t = True,
         indent: int | None = None,
         storage_options: StorageOptions = None,
-        mode: str = 'w',
+        mode: str = "w",
     ) -> str | None:
         """
         Convert the object to a JSON string.
@@ -2480,9 +2480,9 @@ def to_json(
             .. versionadded:: 1.2.0
 
         mode : str, default 'w' (writing)
-            Specify the IO mode for output when supplying a path_or_buf. 
-            Accepted args are 'w' (writing) and 'a' (append) only. 
-            Supplying mode='a' is only supported when lines is True and orient is 'records'.
+            Specify the IO mode for output when supplying a path_or_buf.
+            Accepted args are 'w' (writing) and 'a' (append) only.
+            mode='a' is only supported when lines is True and orient is 'records'.
 
         Returns
         -------
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -136,7 +136,7 @@ def to_json(
     index: bool = True,
     indent: int = 0,
     storage_options: StorageOptions = None,
-    mode: str = 'w',
+    mode: str = "w",
 ) -> str | None:
 
     if not index and orient not in ["split", "table"]:
@@ -147,17 +147,16 @@ def to_json(
     if lines and orient != "records":
         raise ValueError("'lines' keyword only valid when 'orient' is records")
 
-    if mode not in ['a', 'w']:
+    if mode not in ["a", "w"]:
         raise ValueError(
             f"mode={mode!r} is not a valid option. Only 'w' and 'a' are currently supported."
         )
 
-    if mode == 'a' and (not lines or orient != 'records'):
+    if mode == "a" and (not lines or orient != "records"):
         raise ValueError(
             "mode='a' (append) is only supported when lines is True and orient is 'records'"
         )
 
-
     if orient == "table" and isinstance(obj, Series):
         obj = obj.to_frame(name=obj.name or "values")
 
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
@@ -297,3 +297,98 @@ def __iter__(self) -> Iterator:
     reader = MyReader(jsonl)
     assert len(list(read_json(reader, lines=True, chunksize=100))) > 1
     assert reader.read_count > 10
+
+
+@pytest.mark.parametrize("orient_", ["split", "index", "table"])
+def test_to_json_append_orient(orient_):
+    # GH 35849
+    # Test ValueError when orient is not 'records'
+    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'"
+    with pytest.raises(ValueError, match=msg):
+        df.to_json(mode="a", orient=orient_)
+
+
+def test_to_json_append_lines():
+    # GH 35849
+    # Test ValueError when lines is not True
+    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'"
+    with pytest.raises(ValueError, match=msg):
+        df.to_json(mode="a", lines=False, orient="records")
+
+
+@pytest.mark.parametrize("mode_", ['r', 'x'])
+def test_to_json_append_lines(mode_):
+    # GH 35849
+    # Test ValueError when mode is not supported option
+    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    msg = f"mode={mode_!r} is not a valid option. Only 'w' and 'a' are currently supported."
+    with pytest.raises(ValueError, match=msg):
+        df.to_json(mode=mode_, lines=False, orient="records")
+
+
+def to_json_append_output():
+    # GH 35849
+    # Testing that resulting outputs read in as expected.
+    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
+    df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
+    df4 = DataFrame({"col4": [True, False]})
+
+    # Test 1, df1 and df2
+    expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]})
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df1.to_json(path, lines=True, orient="records")
+        df2.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result_df = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
+    del expected
+    del result_df
+
+    # Test 2: df1, df2, df3, df4 (in that order)
+    expected = DataFrame(
+        {
+            "col1": [1, 2, 3, 4, None, None, None, None],
+            "col2": ["a", "b", "c", "d", "e", "f", None, None],
+            "col3": [None, None, None, None, "!", "#", None, None],
+            "col4": [None, None, None, None, None, None, True, False],
+        }
+    )
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df1.to_json(path, mode="a", lines=True, orient="records")
+        df2.to_json(path, mode="a", lines=True, orient="records")
+        df3.to_json(path, mode="a", lines=True, orient="records")
+        df4.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result_df = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
+    del expected
+    del result_df
+
+    # Test 3: df4, df3, df2, df1 (in that order)
+    expected = DataFrame(
+        {
+            "col4": [True, False, None, None, None, None, None, None],
+            "col2": [None, None, "e", "f", "c", "d", "a", "b"],
+            "col3": [None, None, "!", "#", None, None, None, None],
+            "col4": [None, None, None, None, 3, 4, 1, 2],
+        }
+    )
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df4.to_json(path, mode="a", lines=True, orient="records")
+        df3.to_json(path, mode="a", lines=True, orient="records")
+        df2.to_json(path, mode="a", lines=True, orient="records")
+        df1.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result_df = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
+    del expected
+    del result_df