Skip to content

Commit b5d049c

Browse files
authored
Merge pull request #1 from SFuller4/to-json-append-mode
To json append mode
2 parents 8f21b97 + 6d1499a commit b5d049c

File tree

5 files changed

+123
-1
lines changed

5 files changed

+123
-1
lines changed

doc/source/user_guide/io.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,6 +1910,7 @@ with optional parameters:
19101910
* ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'.
19111911
* ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serializable object.
19121912
* ``lines`` : If ``records`` orient, then will write each record per line as json.
1913+
* ``mode`` : string, writer mode when writing to path. 'w' for write, 'a' for append. Default 'w'
19131914

19141915
Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters.
19151916

doc/source/whatsnew/v1.5.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ Other enhancements
333333
- :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`)
334334
- The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`)
335335
- The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`)
336+
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
336337

337338
.. ---------------------------------------------------------------------------
338339
.. _whatsnew_150.notable_bug_fixes:

pandas/core/generic.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2400,6 +2400,7 @@ def to_json(
24002400
index: bool_t = True,
24012401
indent: int | None = None,
24022402
storage_options: StorageOptions = None,
2403+
mode: str = "w",
24032404
) -> str | None:
24042405
"""
24052406
Convert the object to a JSON string.
@@ -2478,6 +2479,11 @@ def to_json(
24782479
24792480
.. versionadded:: 1.2.0
24802481
2482+
mode : str, default 'w' (writing)
2483+
Specify the IO mode for output when supplying a path_or_buf.
2484+
Accepted args are 'w' (writing) and 'a' (append) only.
2485+
mode='a' is only supported when lines is True and orient is 'records'.
2486+
24812487
Returns
24822488
-------
24832489
None or str
@@ -2661,6 +2667,7 @@ def to_json(
26612667
index=index,
26622668
indent=indent,
26632669
storage_options=storage_options,
2670+
mode=mode,
26642671
)
26652672

26662673
@final

pandas/io/json/_json.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ def to_json(
136136
index: bool = True,
137137
indent: int = 0,
138138
storage_options: StorageOptions = None,
139+
mode: str = "w",
139140
) -> str | None:
140141

141142
if not index and orient not in ["split", "table"]:
@@ -146,6 +147,20 @@ def to_json(
146147
if lines and orient != "records":
147148
raise ValueError("'lines' keyword only valid when 'orient' is records")
148149

150+
if mode not in ["a", "w"]:
151+
msg = (
152+
f"mode={repr(mode)} is not a valid option."
153+
"Only 'w' and 'a' are currently supported."
154+
)
155+
raise ValueError(msg)
156+
157+
if mode == "a" and (not lines or orient != "records"):
158+
msg = (
159+
"mode='a' (append) is only supported when"
160+
"lines is True and orient is 'records'"
161+
)
162+
raise ValueError(msg)
163+
149164
if orient == "table" and isinstance(obj, Series):
150165
obj = obj.to_frame(name=obj.name or "values")
151166

@@ -177,7 +192,7 @@ def to_json(
177192
if path_or_buf is not None:
178193
# apply compression and byte/text conversion
179194
with get_handle(
180-
path_or_buf, "w", compression=compression, storage_options=storage_options
195+
path_or_buf, mode, compression=compression, storage_options=storage_options
181196
) as handles:
182197
handles.handle.write(s)
183198
else:

pandas/tests/io/json/test_readlines.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,101 @@ def __iter__(self) -> Iterator:
297297
reader = MyReader(jsonl)
298298
assert len(list(read_json(reader, lines=True, chunksize=100))) > 1
299299
assert reader.read_count > 10
300+
301+
302+
@pytest.mark.parametrize("orient_", ["split", "index", "table"])
303+
def test_to_json_append_orient(orient_):
304+
# GH 35849
305+
# Test ValueError when orient is not 'records'
306+
df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
307+
msg = (
308+
r"mode='a' \(append\) is only supported when"
309+
"lines is True and orient is 'records'"
310+
)
311+
with pytest.raises(ValueError, match=msg):
312+
df.to_json(mode="a", orient=orient_)
313+
314+
315+
def test_to_json_append_lines():
316+
# GH 35849
317+
# Test ValueError when lines is not True
318+
df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
319+
msg = (
320+
r"mode='a' \(append\) is only supported when"
321+
"lines is True and orient is 'records'"
322+
)
323+
with pytest.raises(ValueError, match=msg):
324+
df.to_json(mode="a", lines=False, orient="records")
325+
326+
327+
@pytest.mark.parametrize("mode_", ["r", "x"])
328+
def test_to_json_append_mode(mode_):
329+
# GH 35849
330+
# Test ValueError when mode is not supported option
331+
df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
332+
msg = (
333+
f"mode={repr(mode_)} is not a valid option."
334+
"Only 'w' and 'a' are currently supported."
335+
)
336+
with pytest.raises(ValueError, match=msg):
337+
df.to_json(mode=mode_, lines=False, orient="records")
338+
339+
340+
def to_json_append_output():
341+
# GH 35849
342+
# Testing that resulting outputs read in as expected.
343+
df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
344+
df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
345+
df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
346+
df4 = DataFrame({"col4": [True, False]})
347+
348+
# Test 1, df1 and df2
349+
expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]})
350+
with tm.ensure_clean("test.json") as path:
351+
# Save dataframes to the same file
352+
df1.to_json(path, lines=True, orient="records")
353+
df2.to_json(path, mode="a", lines=True, orient="records")
354+
355+
# Read path file
356+
result = read_json(path, lines=True)
357+
tm.assert_frame_equal(result, expected)
358+
359+
# Test 2: df1, df2, df3, df4 (in that order)
360+
expected = DataFrame(
361+
{
362+
"col1": [1, 2, 3, 4, None, None, None, None],
363+
"col2": ["a", "b", "c", "d", "e", "f", None, None],
364+
"col3": [None, None, None, None, "!", "#", None, None],
365+
"col4": [None, None, None, None, None, None, True, False],
366+
}
367+
)
368+
with tm.ensure_clean("test.json") as path:
369+
# Save dataframes to the same file
370+
df1.to_json(path, mode="a", lines=True, orient="records")
371+
df2.to_json(path, mode="a", lines=True, orient="records")
372+
df3.to_json(path, mode="a", lines=True, orient="records")
373+
df4.to_json(path, mode="a", lines=True, orient="records")
374+
375+
# Read path file
376+
result = read_json(path, lines=True)
377+
tm.assert_frame_equal(result, expected)
378+
379+
# Test 3: df4, df3, df2, df1 (in that order)
380+
expected = DataFrame(
381+
{
382+
"col4": [True, False, None, None, None, None, None, None],
383+
"col2": [None, None, "e", "f", "c", "d", "a", "b"],
384+
"col3": [None, None, "!", "#", None, None, None, None],
385+
"col1": [None, None, None, None, 3, 4, 1, 2],
386+
}
387+
)
388+
with tm.ensure_clean("test.json") as path:
389+
# Save dataframes to the same file
390+
df4.to_json(path, mode="a", lines=True, orient="records")
391+
df3.to_json(path, mode="a", lines=True, orient="records")
392+
df2.to_json(path, mode="a", lines=True, orient="records")
393+
df1.to_json(path, mode="a", lines=True, orient="records")
394+
395+
# Read path file
396+
result = read_json(path, lines=True)
397+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)