From 61818d255d85cd1f1ef1a730980d067ee2ec8e6e Mon Sep 17 00:00:00 2001 From: Eric Lopes Date: Fri, 21 Aug 2020 02:59:01 +0800 Subject: [PATCH 1/8] adding append mode to 'jsonlines' mode of to_json --- pandas/core/generic.py | 2 ++ pandas/io/json/_json.py | 6 +++++- pandas/io/json/_normalize.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fe412bc0ce937..2240123e1c166 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2063,6 +2063,7 @@ def to_json( index: bool_t = True, indent: Optional[int] = None, storage_options: StorageOptions = None, + mode: str = 'w' ) -> Optional[str]: """ Convert the object to a JSON string. @@ -2335,6 +2336,7 @@ def to_json( index=index, indent=indent, storage_options=storage_options, + mode=mode ) def to_hdf( diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index fe5e172655ae1..01562abb8d4f8 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -50,6 +50,7 @@ def to_json( index: bool = True, indent: int = 0, storage_options: StorageOptions = None, + mode: str = 'w' ): if not index and orient not in ["split", "table"]: @@ -68,6 +69,9 @@ def to_json( if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") + if mode == "a" and (not lines or orient!='records'): + raise ValueError("'append mode' only valid when 'line' is True and 'orient' is records") + if orient == "table" and isinstance(obj, Series): obj = obj.to_frame(name=obj.name or "values") @@ -97,7 +101,7 @@ def to_json( s = convert_to_line_delimits(s) if isinstance(path_or_buf, str): - fh, handles = get_handle(path_or_buf, "w", compression=compression) + fh, handles = get_handle(path_or_buf, mode, compression=compression) try: fh.write(s) finally: diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 44765dbe74b46..da7ba98ac5c72 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -23,7 +23,7 @@ def convert_to_line_delimits(s): # json object, only lists can if not s[0] == "[" and s[-1] == "]": return s - s = s[1:-1] + s = s[1:-1]+"\n" return convert_json_to_lines(s) From e5c326f41944174fdc43f2f32dc00cdb5d07f17c Mon Sep 17 00:00:00 2001 From: Eric Lopes Date: Fri, 21 Aug 2020 03:06:42 +0800 Subject: [PATCH 2/8] running black and flake8 --- pandas/core/generic.py | 4 ++-- pandas/io/json/_json.py | 8 +++++--- pandas/io/json/_normalize.py | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2240123e1c166..804b5a97a430e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2063,7 +2063,7 @@ def to_json( index: bool_t = True, indent: Optional[int] = None, storage_options: StorageOptions = None, - mode: str = 'w' + mode: str = "w", ) -> Optional[str]: """ Convert the object to a JSON string. @@ -2336,7 +2336,7 @@ def to_json( index=index, indent=indent, storage_options=storage_options, - mode=mode + mode=mode, ) def to_hdf( diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 01562abb8d4f8..12f3e243f5419 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -50,7 +50,7 @@ def to_json( index: bool = True, indent: int = 0, storage_options: StorageOptions = None, - mode: str = 'w' + mode: str = "w", ): if not index and orient not in ["split", "table"]: @@ -69,8 +69,10 @@ def to_json( if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") - if mode == "a" and (not lines or orient!='records'): - raise ValueError("'append mode' only valid when 'line' is True and 'orient' is records") + if mode == "a" and (not lines or orient != "records"): + raise ValueError( + "'append mode' only valid when 'line' is True and 'orient' is records" + ) if orient == "table" and isinstance(obj, Series): obj = obj.to_frame(name=obj.name or "values") diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index da7ba98ac5c72..a40668f982972 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -23,7 +23,7 @@ def convert_to_line_delimits(s): # json object, only lists can if not s[0] == "[" and s[-1] == "]": return s - s = s[1:-1]+"\n" + s = s[1:-1] + "\n" return convert_json_to_lines(s) From 9007656859326dfb882d76ce26a8fead443b3173 Mon Sep 17 00:00:00 2001 From: Eric Lopes Date: Fri, 21 Aug 2020 03:38:45 +0800 Subject: [PATCH 3/8] handle new line only on existing file for append mode --- pandas/io/json/_json.py | 7 +++++++ pandas/io/json/_normalize.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 12f3e243f5419..14dd82e94f115 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -100,7 +100,14 @@ def to_json( ).write() if lines: + exists = False + try: + exists = os.path.exists(path_or_buf) + except (TypeError, ValueError): + pass + s = convert_to_line_delimits(s) + s = "\n" + s if exists else s if isinstance(path_or_buf, str): fh, handles = get_handle(path_or_buf, mode, compression=compression) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index a40668f982972..44765dbe74b46 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -23,7 +23,7 @@ def convert_to_line_delimits(s): # json object, only lists can if not s[0] == "[" and s[-1] == "]": return s - s = s[1:-1] + "\n" + s = s[1:-1] return convert_json_to_lines(s) From e13c84b061fc23990136d2f2ca1499e3948ae751 Mon Sep 17 00:00:00 2001 From: Eric Lopes Date: Fri, 21 Aug 2020 04:06:30 +0800 Subject: [PATCH 4/8] check if file is not empty to add new line --- pandas/io/json/_json.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 14dd82e94f115..96e9476c1edbe 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -100,14 +100,17 @@ def to_json( ).write() if lines: - exists = False + s = convert_to_line_delimits(s) try: - exists = os.path.exists(path_or_buf) + exists = ( + os.path.exists(path_or_buf) + and os.path.isfile(path_or_buf) + and os.path.getsize(path_or_buf) + ) + s = "\n" + s if exists else s except (TypeError, ValueError): pass - s = convert_to_line_delimits(s) - s = "\n" + s if exists else s if isinstance(path_or_buf, str): fh, handles = get_handle(path_or_buf, mode, compression=compression) From 6d292f7e549621ee9a77b18e239ed20d2f0b46ba Mon Sep 17 00:00:00 2001 From: Eric Lopes Date: Fri, 21 Aug 2020 04:08:05 +0800 Subject: [PATCH 5/8] running black pandas --- pandas/io/json/_json.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 96e9476c1edbe..01d2e1b7b73d5 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -111,7 +111,6 @@ def to_json( except (TypeError, ValueError): pass - if isinstance(path_or_buf, str): fh, handles = get_handle(path_or_buf, mode, compression=compression) try: From 4357a695f261cbd0e30ca37c2789bb7ce7d70c36 Mon Sep 17 00:00:00 2001 From: Eric Lopes Date: Fri, 21 Aug 2020 04:42:26 +0800 Subject: [PATCH 6/8] new line on append mode only --- pandas/io/json/_json.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 01d2e1b7b73d5..627b8a48100a8 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -102,12 +102,13 @@ def to_json( if lines: s = convert_to_line_delimits(s) try: - exists = ( - os.path.exists(path_or_buf) + add_new_line = ( + mode == "a" + and os.path.exists(path_or_buf) and os.path.isfile(path_or_buf) and os.path.getsize(path_or_buf) ) - s = "\n" + s if exists else s + s = "\n" + s if add_new_line else s except (TypeError, ValueError): pass From bb76a385289f9f01ee627f55c29e0e2cb53962e7 Mon Sep 17 00:00:00 2001 From: Eric Lopes Date: Fri, 21 Aug 2020 05:08:17 +0800 Subject: [PATCH 7/8] adding documentation --- pandas/core/generic.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 804b5a97a430e..987b84b2f729c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2157,6 +2157,14 @@ def to_json( .. versionadded:: 1.2.0 + mode : str, default 'w' + If 'orient' is 'records' and 'lines' is 'True' enable option to append + mode ('mode' is 'a') to a json file instead of overwriting. + Will throw ValueError if incorrect 'orient' and 'lines'. + + .. versionadded:: 1.2.0 + + Returns ------- None or str From dc5c1f5e3b85239bc8507cfb4b0d0a7f5f250bb5 Mon Sep 17 00:00:00 2001 From: Eric Lopes Date: Fri, 21 Aug 2020 05:36:58 +0800 Subject: [PATCH 8/8] fix docstring validation --- pandas/core/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 987b84b2f729c..c3c4a14146541 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2164,7 +2164,6 @@ def to_json( .. versionadded:: 1.2.0 - Returns ------- None or str