Skip to content

Commit 0428534

Browse files
committed
Adding tests for append functionality, along with updated whatsnew, user_guide, and generic docstring.
1 parent 6f4d068 commit 0428534

File tree

5 files changed

+104
-8
lines changed

5 files changed

+104
-8
lines changed

doc/source/user_guide/io.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,6 +1910,7 @@ with optional parameters:
19101910
* ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'.
19111911
* ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serializable object.
19121912
* ``lines`` : If ``records`` orient, then will write each record per line as json.
1913+
* ``mode`` : string, writer mode when writing to path. 'w' for write, 'a' for append. Default 'w'
19131914

19141915
Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters.
19151916

doc/source/whatsnew/v1.5.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ Other enhancements
333333
- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
334334
- The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`)
335335
- The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`)
336+
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
336337

337338
.. ---------------------------------------------------------------------------
338339
.. _whatsnew_150.notable_bug_fixes:

pandas/core/generic.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2400,7 +2400,7 @@ def to_json(
24002400
index: bool_t = True,
24012401
indent: int | None = None,
24022402
storage_options: StorageOptions = None,
2403-
mode: str = 'w',
2403+
mode: str = "w",
24042404
) -> str | None:
24052405
"""
24062406
Convert the object to a JSON string.
@@ -2480,9 +2480,9 @@ def to_json(
24802480
.. versionadded:: 1.2.0
24812481
24822482
mode : str, default 'w' (writing)
2483-
Specify the IO mode for output when supplying a path_or_buf.
2484-
Accepted args are 'w' (writing) and 'a' (append) only.
2485-
Supplying mode='a' is only supported when lines is True and orient is 'records'.
2483+
Specify the IO mode for output when supplying a path_or_buf.
2484+
Accepted args are 'w' (writing) and 'a' (append) only.
2485+
mode='a' is only supported when lines is True and orient is 'records'.
24862486
24872487
Returns
24882488
-------

pandas/io/json/_json.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def to_json(
136136
index: bool = True,
137137
indent: int = 0,
138138
storage_options: StorageOptions = None,
139-
mode: str = 'w',
139+
mode: str = "w",
140140
) -> str | None:
141141

142142
if not index and orient not in ["split", "table"]:
@@ -147,17 +147,16 @@ def to_json(
147147
if lines and orient != "records":
148148
raise ValueError("'lines' keyword only valid when 'orient' is records")
149149

150-
if mode not in ['a', 'w']:
150+
if mode not in ["a", "w"]:
151151
raise ValueError(
152152
f"mode={mode!r} is not a valid option. Only 'w' and 'a' are currently supported."
153153
)
154154

155-
if mode == 'a' and (not lines or orient != 'records'):
155+
if mode == "a" and (not lines or orient != "records"):
156156
raise ValueError(
157157
"mode='a' (append) is only supported when lines is True and orient is 'records'"
158158
)
159159

160-
161160
if orient == "table" and isinstance(obj, Series):
162161
obj = obj.to_frame(name=obj.name or "values")
163162

pandas/tests/io/json/test_readlines.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,98 @@ def __iter__(self) -> Iterator:
297297
reader = MyReader(jsonl)
298298
assert len(list(read_json(reader, lines=True, chunksize=100))) > 1
299299
assert reader.read_count > 10
300+
301+
302+
@pytest.mark.parametrize("orient_", ["split", "index", "table"])
303+
def test_to_json_append_orient(orient_):
304+
# GH 35849
305+
# Test ValueError when orient is not 'records'
306+
df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
307+
msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'"
308+
with pytest.raises(ValueError, match=msg):
309+
df.to_json(mode="a", orient=orient_)
310+
311+
312+
def test_to_json_append_lines():
313+
# GH 35849
314+
# Test ValueError when lines is not True
315+
df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
316+
msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'"
317+
with pytest.raises(ValueError, match=msg):
318+
df.to_json(mode="a", lines=False, orient="records")
319+
320+
321+
@pytest.mark.parametrize("mode_", ['r', 'x'])
322+
def test_to_json_append_lines(mode_):
323+
# GH 35849
324+
# Test ValueError when mode is not supported option
325+
df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
326+
msg = f"mode={mode_!r} is not a valid option. Only 'w' and 'a' are currently supported."
327+
with pytest.raises(ValueError, match=msg):
328+
df.to_json(mode=mode_, lines=False, orient="records")
329+
330+
331+
def to_json_append_output():
332+
# GH 35849
333+
# Testing that resulting outputs read in as expected.
334+
df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
335+
df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
336+
df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
337+
df4 = DataFrame({"col4": [True, False]})
338+
339+
# Test 1, df1 and df2
340+
expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]})
341+
with tm.ensure_clean("test.json") as path:
342+
# Save dataframes to the same file
343+
df1.to_json(path, lines=True, orient="records")
344+
df2.to_json(path, mode="a", lines=True, orient="records")
345+
346+
# Read path file
347+
result_df = read_json(path, lines=True)
348+
tm.assert_frame_equal(result, expected)
349+
del expected
350+
del result_df
351+
352+
# Test 2: df1, df2, df3, df4 (in that order)
353+
expected = DataFrame(
354+
{
355+
"col1": [1, 2, 3, 4, None, None, None, None],
356+
"col2": ["a", "b", "c", "d", "e", "f", None, None],
357+
"col3": [None, None, None, None, "!", "#", None, None],
358+
"col4": [None, None, None, None, None, None, True, False],
359+
}
360+
)
361+
with tm.ensure_clean("test.json") as path:
362+
# Save dataframes to the same file
363+
df1.to_json(path, mode="a", lines=True, orient="records")
364+
df2.to_json(path, mode="a", lines=True, orient="records")
365+
df3.to_json(path, mode="a", lines=True, orient="records")
366+
df4.to_json(path, mode="a", lines=True, orient="records")
367+
368+
# Read path file
369+
result_df = read_json(path, lines=True)
370+
tm.assert_frame_equal(result, expected)
371+
del expected
372+
del result_df
373+
374+
# Test 3: df4, df3, df2, df1 (in that order)
375+
expected = DataFrame(
376+
{
377+
"col4": [True, False, None, None, None, None, None, None],
378+
"col2": [None, None, "e", "f", "c", "d", "a", "b"],
379+
"col3": [None, None, "!", "#", None, None, None, None],
380+
"col4": [None, None, None, None, 3, 4, 1, 2],
381+
}
382+
)
383+
with tm.ensure_clean("test.json") as path:
384+
# Save dataframes to the same file
385+
df4.to_json(path, mode="a", lines=True, orient="records")
386+
df3.to_json(path, mode="a", lines=True, orient="records")
387+
df2.to_json(path, mode="a", lines=True, orient="records")
388+
df1.to_json(path, mode="a", lines=True, orient="records")
389+
390+
# Read path file
391+
result_df = read_json(path, lines=True)
392+
tm.assert_frame_equal(result, expected)
393+
del expected
394+
del result_df

0 commit comments

Comments
 (0)