From 6f4d068135164e484f51f15ab364e64f5bdbeb27 Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Mon, 12 Sep 2022 23:08:37 -0500 Subject: [PATCH 01/16] Adding functionality for mode='a' when saving DataFrame.to_json. Only supported when lines=True and orient='records'. --- pandas/core/generic.py | 7 +++++++ pandas/io/json/_json.py | 14 +++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index aa9845a2abb78..f89f383a1c7b4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2400,6 +2400,7 @@ def to_json( index: bool_t = True, indent: int | None = None, storage_options: StorageOptions = None, + mode: str = 'w', ) -> str | None: """ Convert the object to a JSON string. @@ -2478,6 +2479,11 @@ def to_json( .. versionadded:: 1.2.0 + mode : str, default 'w' (writing) + Specify the IO mode for output when supplying a path_or_buf. + Accepted args are 'w' (writing) and 'a' (append) only. + Supplying mode='a' is only supported when lines is True and orient is 'records'. + Returns ------- None or str @@ -2661,6 +2667,7 @@ def to_json( index=index, indent=indent, storage_options=storage_options, + mode=mode, ) @final diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 701f72f605989..579546782bc85 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -136,6 +136,7 @@ def to_json( index: bool = True, indent: int = 0, storage_options: StorageOptions = None, + mode: str = 'w', ) -> str | None: if not index and orient not in ["split", "table"]: @@ -146,6 +147,17 @@ def to_json( if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") + if mode not in ['a', 'w']: + raise ValueError( + f"mode={mode!r} is not a valid option. Only 'w' and 'a' are currently supported." + ) + + if mode == 'a' and (not lines or orient != 'records'): + raise ValueError( + "mode='a' (append) is only supported when lines is True and orient is 'records'" + ) + + if orient == "table" and isinstance(obj, Series): obj = obj.to_frame(name=obj.name or "values") @@ -177,7 +189,7 @@ def to_json( if path_or_buf is not None: # apply compression and byte/text conversion with get_handle( - path_or_buf, "w", compression=compression, storage_options=storage_options + path_or_buf, mode, compression=compression, storage_options=storage_options ) as handles: handles.handle.write(s) else: From 042853436f08e28060157bf741c689403ff5454d Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Tue, 13 Sep 2022 22:02:29 -0500 Subject: [PATCH 02/16] Adding tests for append functionality, along with updated whatsnew, user_guide, and generic docstring. --- doc/source/user_guide/io.rst | 1 + doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/generic.py | 8 +-- pandas/io/json/_json.py | 7 +- pandas/tests/io/json/test_readlines.py | 95 ++++++++++++++++++++++++++ 5 files changed, 104 insertions(+), 8 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 15b3b894c68b6..24e2db01c063e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1910,6 +1910,7 @@ with optional parameters: * ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'. * ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serializable object. * ``lines`` : If ``records`` orient, then will write each record per line as json. +* ``mode`` : string, writer mode when writing to path. 'w' for write, 'a' for append. Default 'w' Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters. diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index d8a319da2065e..0841e9c22edd7 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -333,6 +333,7 @@ Other enhancements - :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`) - The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`) - The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`) +- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f89f383a1c7b4..f615381a1aee8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2400,7 +2400,7 @@ def to_json( index: bool_t = True, indent: int | None = None, storage_options: StorageOptions = None, - mode: str = 'w', + mode: str = "w", ) -> str | None: """ Convert the object to a JSON string. @@ -2480,9 +2480,9 @@ def to_json( .. versionadded:: 1.2.0 mode : str, default 'w' (writing) - Specify the IO mode for output when supplying a path_or_buf. - Accepted args are 'w' (writing) and 'a' (append) only. - Supplying mode='a' is only supported when lines is True and orient is 'records'. + Specify the IO mode for output when supplying a path_or_buf. + Accepted args are 'w' (writing) and 'a' (append) only. + mode='a' is only supported when lines is True and orient is 'records'. Returns ------- diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 579546782bc85..8f7277d1bb9c0 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -136,7 +136,7 @@ def to_json( index: bool = True, indent: int = 0, storage_options: StorageOptions = None, - mode: str = 'w', + mode: str = "w", ) -> str | None: if not index and orient not in ["split", "table"]: @@ -147,17 +147,16 @@ def to_json( if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") - if mode not in ['a', 'w']: + if mode not in ["a", "w"]: raise ValueError( f"mode={mode!r} is not a valid option. Only 'w' and 'a' are currently supported." ) - if mode == 'a' and (not lines or orient != 'records'): + if mode == "a" and (not lines or orient != "records"): raise ValueError( "mode='a' (append) is only supported when lines is True and orient is 'records'" ) - if orient == "table" and isinstance(obj, Series): obj = obj.to_frame(name=obj.name or "values") diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index b371990178d28..796c2813811a6 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -297,3 +297,98 @@ def __iter__(self) -> Iterator: reader = MyReader(jsonl) assert len(list(read_json(reader, lines=True, chunksize=100))) > 1 assert reader.read_count > 10 + + +@pytest.mark.parametrize("orient_", ["split", "index", "table"]) +def test_to_json_append_orient(orient_): + # GH 35849 + # Test ValueError when orient is not 'records' + df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'" + with pytest.raises(ValueError, match=msg): + df.to_json(mode="a", orient=orient_) + + +def test_to_json_append_lines(): + # GH 35849 + # Test ValueError when lines is not True + df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'" + with pytest.raises(ValueError, match=msg): + df.to_json(mode="a", lines=False, orient="records") + + +@pytest.mark.parametrize("mode_", ['r', 'x']) +def test_to_json_append_lines(mode_): + # GH 35849 + # Test ValueError when mode is not supported option + df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + msg = f"mode={mode_!r} is not a valid option. Only 'w' and 'a' are currently supported." + with pytest.raises(ValueError, match=msg): + df.to_json(mode=mode_, lines=False, orient="records") + + +def to_json_append_output(): + # GH 35849 + # Testing that resulting outputs read in as expected. + df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]}) + df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]}) + df4 = DataFrame({"col4": [True, False]}) + + # Test 1, df1 and df2 + expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]}) + with tm.ensure_clean("test.json") as path: + # Save dataframes to the same file + df1.to_json(path, lines=True, orient="records") + df2.to_json(path, mode="a", lines=True, orient="records") + + # Read path file + result_df = read_json(path, lines=True) + tm.assert_frame_equal(result, expected) + del expected + del result_df + + # Test 2: df1, df2, df3, df4 (in that order) + expected = DataFrame( + { + "col1": [1, 2, 3, 4, None, None, None, None], + "col2": ["a", "b", "c", "d", "e", "f", None, None], + "col3": [None, None, None, None, "!", "#", None, None], + "col4": [None, None, None, None, None, None, True, False], + } + ) + with tm.ensure_clean("test.json") as path: + # Save dataframes to the same file + df1.to_json(path, mode="a", lines=True, orient="records") + df2.to_json(path, mode="a", lines=True, orient="records") + df3.to_json(path, mode="a", lines=True, orient="records") + df4.to_json(path, mode="a", lines=True, orient="records") + + # Read path file + result_df = read_json(path, lines=True) + tm.assert_frame_equal(result, expected) + del expected + del result_df + + # Test 3: df4, df3, df2, df1 (in that order) + expected = DataFrame( + { + "col4": [True, False, None, None, None, None, None, None], + "col2": [None, None, "e", "f", "c", "d", "a", "b"], + "col3": [None, None, "!", "#", None, None, None, None], + "col4": [None, None, None, None, 3, 4, 1, 2], + } + ) + with tm.ensure_clean("test.json") as path: + # Save dataframes to the same file + df4.to_json(path, mode="a", lines=True, orient="records") + df3.to_json(path, mode="a", lines=True, orient="records") + df2.to_json(path, mode="a", lines=True, orient="records") + df1.to_json(path, mode="a", lines=True, orient="records") + + # Read path file + result_df = read_json(path, lines=True) + tm.assert_frame_equal(result, expected) + del expected + del result_df From 6d1499a52812a482c8e900fb5e2bb8afed6536c1 Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Tue, 13 Sep 2022 22:24:31 -0500 Subject: [PATCH 03/16] pre-commit adjustments --- pandas/io/json/_json.py | 12 ++++++---- pandas/tests/io/json/test_readlines.py | 33 ++++++++++++++------------ 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 8f7277d1bb9c0..f5748cf677b81 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -148,14 +148,18 @@ def to_json( raise ValueError("'lines' keyword only valid when 'orient' is records") if mode not in ["a", "w"]: - raise ValueError( - f"mode={mode!r} is not a valid option. Only 'w' and 'a' are currently supported." + msg = ( + f"mode={repr(mode)} is not a valid option." + "Only 'w' and 'a' are currently supported." ) + raise ValueError(msg) if mode == "a" and (not lines or orient != "records"): - raise ValueError( - "mode='a' (append) is only supported when lines is True and orient is 'records'" + msg = ( + "mode='a' (append) is only supported when" + "lines is True and orient is 'records'" ) + raise ValueError(msg) if orient == "table" and isinstance(obj, Series): obj = obj.to_frame(name=obj.name or "values") diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 796c2813811a6..cdc4a536af707 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -304,7 +304,10 @@ def test_to_json_append_orient(orient_): # GH 35849 # Test ValueError when orient is not 'records' df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) - msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'" + msg = ( + r"mode='a' \(append\) is only supported when" + "lines is True and orient is 'records'" + ) with pytest.raises(ValueError, match=msg): df.to_json(mode="a", orient=orient_) @@ -313,17 +316,23 @@ def test_to_json_append_lines(): # GH 35849 # Test ValueError when lines is not True df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) - msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'" + msg = ( + r"mode='a' \(append\) is only supported when" + "lines is True and orient is 'records'" + ) with pytest.raises(ValueError, match=msg): df.to_json(mode="a", lines=False, orient="records") -@pytest.mark.parametrize("mode_", ['r', 'x']) -def test_to_json_append_lines(mode_): +@pytest.mark.parametrize("mode_", ["r", "x"]) +def test_to_json_append_mode(mode_): # GH 35849 # Test ValueError when mode is not supported option df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) - msg = f"mode={mode_!r} is not a valid option. Only 'w' and 'a' are currently supported." + msg = ( + f"mode={repr(mode_)} is not a valid option." + "Only 'w' and 'a' are currently supported." + ) with pytest.raises(ValueError, match=msg): df.to_json(mode=mode_, lines=False, orient="records") @@ -344,10 +353,8 @@ def to_json_append_output(): df2.to_json(path, mode="a", lines=True, orient="records") # Read path file - result_df = read_json(path, lines=True) + result = read_json(path, lines=True) tm.assert_frame_equal(result, expected) - del expected - del result_df # Test 2: df1, df2, df3, df4 (in that order) expected = DataFrame( @@ -366,10 +373,8 @@ def to_json_append_output(): df4.to_json(path, mode="a", lines=True, orient="records") # Read path file - result_df = read_json(path, lines=True) + result = read_json(path, lines=True) tm.assert_frame_equal(result, expected) - del expected - del result_df # Test 3: df4, df3, df2, df1 (in that order) expected = DataFrame( @@ -377,7 +382,7 @@ def to_json_append_output(): "col4": [True, False, None, None, None, None, None, None], "col2": [None, None, "e", "f", "c", "d", "a", "b"], "col3": [None, None, "!", "#", None, None, None, None], - "col4": [None, None, None, None, 3, 4, 1, 2], + "col1": [None, None, None, None, 3, 4, 1, 2], } ) with tm.ensure_clean("test.json") as path: @@ -388,7 +393,5 @@ def to_json_append_output(): df1.to_json(path, mode="a", lines=True, orient="records") # Read path file - result_df = read_json(path, lines=True) + result = read_json(path, lines=True) tm.assert_frame_equal(result, expected) - del expected - del result_df From 381dce3ecfaf013ca30aa175408a5c594ad99958 Mon Sep 17 00:00:00 2001 From: SFuller4 <39442631+SFuller4@users.noreply.github.com> Date: Wed, 14 Sep 2022 12:58:24 -0500 Subject: [PATCH 04/16] Update pandas/io/json/_json.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/io/json/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index f5748cf677b81..22402eb901ed8 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -149,7 +149,7 @@ def to_json( if mode not in ["a", "w"]: msg = ( - f"mode={repr(mode)} is not a valid option." + f"mode={mode} is not a valid option." "Only 'w' and 'a' are currently supported." ) raise ValueError(msg) From 50b29e7a3a7cb6d384d2535b846e98b22ab255d3 Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Wed, 14 Sep 2022 13:23:49 -0500 Subject: [PATCH 05/16] Fixing pytest cases per request from mroeschke. Switching whatsnew version per request from mroeschke. --- doc/source/whatsnew/v1.5.0.rst | 1 - doc/source/whatsnew/v1.6.0.rst | 2 +- pandas/tests/io/json/test_readlines.py | 61 +++++++++++++++++++++++--- 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 0841e9c22edd7..d8a319da2065e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -333,7 +333,6 @@ Other enhancements - :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`) - The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`) - The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`) -- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index aff950c6933dd..d85a4c660d20f 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -31,7 +31,7 @@ Other enhancements - :meth:`.GroupBy.quantile` now preserving nullable dtypes instead of casting to numpy dtypes (:issue:`37493`) - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`) - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`) -- +- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) .. --------------------------------------------------------------------------- .. _whatsnew_160.notable_bug_fixes: diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index cdc4a536af707..fe860f59130f7 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -330,22 +330,20 @@ def test_to_json_append_mode(mode_): # Test ValueError when mode is not supported option df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) msg = ( - f"mode={repr(mode_)} is not a valid option." + f"mode={mode_} is not a valid option." "Only 'w' and 'a' are currently supported." ) with pytest.raises(ValueError, match=msg): df.to_json(mode=mode_, lines=False, orient="records") -def to_json_append_output(): +def to_json_append_output_consistent_columns(): # GH 35849 - # Testing that resulting outputs read in as expected. + # Testing that resulting output reads in as expected. + # Testing same columns, new rows df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]}) - df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]}) - df4 = DataFrame({"col4": [True, False]}) - # Test 1, df1 and df2 expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]}) with tm.ensure_clean("test.json") as path: # Save dataframes to the same file @@ -356,7 +354,46 @@ def to_json_append_output(): result = read_json(path, lines=True) tm.assert_frame_equal(result, expected) + +def to_json_append_output_inconsistent_columns(): + # GH 35849 + # Testing that resulting output reads in as expected. + # Testing one new column, one old column, new rows + df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]}) + # Test 2: df1, df2, df3, df4 (in that order) + expected = DataFrame( + { + "col1": [ + 1, + 2, + None, + None, + ], + "col2": ["a", "b", "e", "f"], + "col3": [None, None, "!", "#"], + } + ) + with tm.ensure_clean("test.json") as path: + # Save dataframes to the same file + df1.to_json(path, mode="a", lines=True, orient="records") + df3.to_json(path, mode="a", lines=True, orient="records") + + # Read path file + result = read_json(path, lines=True) + tm.assert_frame_equal(result, expected) + + +def to_json_append_output_different_columns(): + # GH 35849 + # Testing that resulting output reads in as expected. + # Testing same, differing and new columns + df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]}) + df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]}) + df4 = DataFrame({"col4": [True, False]}) + expected = DataFrame( { "col1": [1, 2, 3, 4, None, None, None, None], @@ -376,7 +413,17 @@ def to_json_append_output(): result = read_json(path, lines=True) tm.assert_frame_equal(result, expected) - # Test 3: df4, df3, df2, df1 (in that order) + +def to_json_append_output_different_columns_reordered(): + # GH 35849 + # Testing that resulting output reads in as expected. + # Testing specific result column order. + df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]}) + df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]}) + df4 = DataFrame({"col4": [True, False]}) + + # df4, df3, df2, df1 (in that order) expected = DataFrame( { "col4": [True, False, None, None, None, None, None, None], From 09ac7f156f06efdd98228058c0830784645f36ef Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Wed, 14 Sep 2022 13:26:59 -0500 Subject: [PATCH 06/16] removed repr() from ValueError msg --- pandas/io/json/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index f5748cf677b81..22402eb901ed8 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -149,7 +149,7 @@ def to_json( if mode not in ["a", "w"]: msg = ( - f"mode={repr(mode)} is not a valid option." + f"mode={mode} is not a valid option." "Only 'w' and 'a' are currently supported." ) raise ValueError(msg) From a8cc86d269c785e49ecb7691f68fb471887394ff Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Wed, 14 Sep 2022 13:29:46 -0500 Subject: [PATCH 07/16] fixing bad formatting --- pandas/tests/io/json/test_readlines.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index fe860f59130f7..4b815203e2651 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -362,15 +362,9 @@ def to_json_append_output_inconsistent_columns(): df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]}) - # Test 2: df1, df2, df3, df4 (in that order) expected = DataFrame( { - "col1": [ - 1, - 2, - None, - None, - ], + "col1": [1, 2, None, None], "col2": ["a", "b", "e", "f"], "col3": [None, None, "!", "#"], } From 51c5c258f2f2cdb6ed54260f2092a32fd93f2055 Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Wed, 14 Sep 2022 19:46:21 -0500 Subject: [PATCH 08/16] Adjusting Typing from str to Literal["a", "w"] per request. --- pandas/io/json/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 22402eb901ed8..af57645a20421 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -136,7 +136,7 @@ def to_json( index: bool = True, indent: int = 0, storage_options: StorageOptions = None, - mode: str = "w", + mode: Literal["a", "w"] = "w", ) -> str | None: if not index and orient not in ["split", "table"]: From 29d8995a9089100816b13d4ca5a07fcdbeed1ab2 Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Mon, 17 Oct 2022 19:25:05 -0500 Subject: [PATCH 09/16] updating typing issues in the core file --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f615381a1aee8..6713d49869399 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2400,7 +2400,7 @@ def to_json( index: bool_t = True, indent: int | None = None, storage_options: StorageOptions = None, - mode: str = "w", + mode: Literal["a", "w"] = "w", ) -> str | None: """ Convert the object to a JSON string. From 2ead06ab0533980d1b289260d507f32c93d3a84a Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Mon, 12 Sep 2022 23:08:37 -0500 Subject: [PATCH 10/16] Adding functionality for mode='a' when saving DataFrame.to_json. Only supported when lines=True and orient='records'. --- pandas/core/generic.py | 7 +++++++ pandas/io/json/_json.py | 14 +++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0dccaad1441e4..087c641184d58 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2407,6 +2407,7 @@ def to_json( index: bool_t = True, indent: int | None = None, storage_options: StorageOptions = None, + mode: str = 'w', ) -> str | None: """ Convert the object to a JSON string. @@ -2485,6 +2486,11 @@ def to_json( .. versionadded:: 1.2.0 + mode : str, default 'w' (writing) + Specify the IO mode for output when supplying a path_or_buf. + Accepted args are 'w' (writing) and 'a' (append) only. + Supplying mode='a' is only supported when lines is True and orient is 'records'. + Returns ------- None or str @@ -2668,6 +2674,7 @@ def to_json( index=index, indent=indent, storage_options=storage_options, + mode=mode, ) @final diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 9b8364c449e36..2c4b2b69da2c5 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -136,6 +136,7 @@ def to_json( index: bool = True, indent: int = 0, storage_options: StorageOptions = None, + mode: str = 'w', ) -> str | None: if not index and orient not in ["split", "table"]: @@ -146,6 +147,17 @@ def to_json( if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") + if mode not in ['a', 'w']: + raise ValueError( + f"mode={mode!r} is not a valid option. Only 'w' and 'a' are currently supported." + ) + + if mode == 'a' and (not lines or orient != 'records'): + raise ValueError( + "mode='a' (append) is only supported when lines is True and orient is 'records'" + ) + + if orient == "table" and isinstance(obj, Series): obj = obj.to_frame(name=obj.name or "values") @@ -177,7 +189,7 @@ def to_json( if path_or_buf is not None: # apply compression and byte/text conversion with get_handle( - path_or_buf, "w", compression=compression, storage_options=storage_options + path_or_buf, mode, compression=compression, storage_options=storage_options ) as handles: handles.handle.write(s) else: From 40fd626c06c9e259a3ba308d889aa315f5b81c61 Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Tue, 13 Sep 2022 22:02:29 -0500 Subject: [PATCH 11/16] Adding tests for append functionality, along with updated whatsnew, user_guide, and generic docstring. --- doc/source/user_guide/io.rst | 1 + doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/generic.py | 8 +-- pandas/io/json/_json.py | 7 +- pandas/tests/io/json/test_readlines.py | 95 ++++++++++++++++++++++++++ 5 files changed, 104 insertions(+), 8 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 63e6b007f77a8..1d56b4fbd48b5 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1918,6 +1918,7 @@ with optional parameters: * ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'. * ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serializable object. * ``lines`` : If ``records`` orient, then will write each record per line as json. +* ``mode`` : string, writer mode when writing to path. 'w' for write, 'a' for append. Default 'w' Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters. diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 3d67f5c4818ad..86f3010cabb96 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -332,6 +332,7 @@ Other enhancements - Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`) - The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`) - The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`) +- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 087c641184d58..4c43e29932b88 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2407,7 +2407,7 @@ def to_json( index: bool_t = True, indent: int | None = None, storage_options: StorageOptions = None, - mode: str = 'w', + mode: str = "w", ) -> str | None: """ Convert the object to a JSON string. @@ -2487,9 +2487,9 @@ def to_json( .. versionadded:: 1.2.0 mode : str, default 'w' (writing) - Specify the IO mode for output when supplying a path_or_buf. - Accepted args are 'w' (writing) and 'a' (append) only. - Supplying mode='a' is only supported when lines is True and orient is 'records'. + Specify the IO mode for output when supplying a path_or_buf. + Accepted args are 'w' (writing) and 'a' (append) only. + mode='a' is only supported when lines is True and orient is 'records'. Returns ------- diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 2c4b2b69da2c5..71204b738c513 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -136,7 +136,7 @@ def to_json( index: bool = True, indent: int = 0, storage_options: StorageOptions = None, - mode: str = 'w', + mode: str = "w", ) -> str | None: if not index and orient not in ["split", "table"]: @@ -147,17 +147,16 @@ def to_json( if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") - if mode not in ['a', 'w']: + if mode not in ["a", "w"]: raise ValueError( f"mode={mode!r} is not a valid option. Only 'w' and 'a' are currently supported." ) - if mode == 'a' and (not lines or orient != 'records'): + if mode == "a" and (not lines or orient != "records"): raise ValueError( "mode='a' (append) is only supported when lines is True and orient is 'records'" ) - if orient == "table" and isinstance(obj, Series): obj = obj.to_frame(name=obj.name or "values") diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index b371990178d28..796c2813811a6 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -297,3 +297,98 @@ def __iter__(self) -> Iterator: reader = MyReader(jsonl) assert len(list(read_json(reader, lines=True, chunksize=100))) > 1 assert reader.read_count > 10 + + +@pytest.mark.parametrize("orient_", ["split", "index", "table"]) +def test_to_json_append_orient(orient_): + # GH 35849 + # Test ValueError when orient is not 'records' + df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'" + with pytest.raises(ValueError, match=msg): + df.to_json(mode="a", orient=orient_) + + +def test_to_json_append_lines(): + # GH 35849 + # Test ValueError when lines is not True + df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'" + with pytest.raises(ValueError, match=msg): + df.to_json(mode="a", lines=False, orient="records") + + +@pytest.mark.parametrize("mode_", ['r', 'x']) +def test_to_json_append_lines(mode_): + # GH 35849 + # Test ValueError when mode is not supported option + df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + msg = f"mode={mode_!r} is not a valid option. Only 'w' and 'a' are currently supported." + with pytest.raises(ValueError, match=msg): + df.to_json(mode=mode_, lines=False, orient="records") + + +def to_json_append_output(): + # GH 35849 + # Testing that resulting outputs read in as expected. + df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]}) + df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]}) + df4 = DataFrame({"col4": [True, False]}) + + # Test 1, df1 and df2 + expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]}) + with tm.ensure_clean("test.json") as path: + # Save dataframes to the same file + df1.to_json(path, lines=True, orient="records") + df2.to_json(path, mode="a", lines=True, orient="records") + + # Read path file + result_df = read_json(path, lines=True) + tm.assert_frame_equal(result, expected) + del expected + del result_df + + # Test 2: df1, df2, df3, df4 (in that order) + expected = DataFrame( + { + "col1": [1, 2, 3, 4, None, None, None, None], + "col2": ["a", "b", "c", "d", "e", "f", None, None], + "col3": [None, None, None, None, "!", "#", None, None], + "col4": [None, None, None, None, None, None, True, False], + } + ) + with tm.ensure_clean("test.json") as path: + # Save dataframes to the same file + df1.to_json(path, mode="a", lines=True, orient="records") + df2.to_json(path, mode="a", lines=True, orient="records") + df3.to_json(path, mode="a", lines=True, orient="records") + df4.to_json(path, mode="a", lines=True, orient="records") + + # Read path file + result_df = read_json(path, lines=True) + tm.assert_frame_equal(result, expected) + del expected + del result_df + + # Test 3: df4, df3, df2, df1 (in that order) + expected = DataFrame( + { + "col4": [True, False, None, None, None, None, None, None], + "col2": [None, None, "e", "f", "c", "d", "a", "b"], + "col3": [None, None, "!", "#", None, None, None, None], + "col4": [None, None, None, None, 3, 4, 1, 2], + } + ) + with tm.ensure_clean("test.json") as path: + # Save dataframes to the same file + df4.to_json(path, mode="a", lines=True, orient="records") + df3.to_json(path, mode="a", lines=True, orient="records") + df2.to_json(path, mode="a", lines=True, orient="records") + df1.to_json(path, mode="a", lines=True, orient="records") + + # Read path file + result_df = read_json(path, lines=True) + tm.assert_frame_equal(result, expected) + del expected + del result_df From c62ec13e7648a2e7c6c8034ef5b67e9547f788e7 Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Tue, 13 Sep 2022 22:24:31 -0500 Subject: [PATCH 12/16] pre-commit adjustments --- pandas/io/json/_json.py | 12 ++++++---- pandas/tests/io/json/test_readlines.py | 33 ++++++++++++++------------ 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 71204b738c513..d2315c9fa9406 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -148,14 +148,18 @@ def to_json( raise ValueError("'lines' keyword only valid when 'orient' is records") if mode not in ["a", "w"]: - raise ValueError( - f"mode={mode!r} is not a valid option. Only 'w' and 'a' are currently supported." + msg = ( + f"mode={repr(mode)} is not a valid option." + "Only 'w' and 'a' are currently supported." ) + raise ValueError(msg) if mode == "a" and (not lines or orient != "records"): - raise ValueError( - "mode='a' (append) is only supported when lines is True and orient is 'records'" + msg = ( + "mode='a' (append) is only supported when" + "lines is True and orient is 'records'" ) + raise ValueError(msg) if orient == "table" and isinstance(obj, Series): obj = obj.to_frame(name=obj.name or "values") diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 796c2813811a6..cdc4a536af707 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -304,7 +304,10 @@ def test_to_json_append_orient(orient_): # GH 35849 # Test ValueError when orient is not 'records' df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) - msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'" + msg = ( + r"mode='a' \(append\) is only supported when" + "lines is True and orient is 'records'" + ) with pytest.raises(ValueError, match=msg): df.to_json(mode="a", orient=orient_) @@ -313,17 +316,23 @@ def test_to_json_append_lines(): # GH 35849 # Test ValueError when lines is not True df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) - msg = r"mode='a' \(append\) is only supported when lines is True and orient is 'records'" + msg = ( + r"mode='a' \(append\) is only supported when" + "lines is True and orient is 'records'" + ) with pytest.raises(ValueError, match=msg): df.to_json(mode="a", lines=False, orient="records") -@pytest.mark.parametrize("mode_", ['r', 'x']) -def test_to_json_append_lines(mode_): +@pytest.mark.parametrize("mode_", ["r", "x"]) +def test_to_json_append_mode(mode_): # GH 35849 # Test ValueError when mode is not supported option df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) - msg = f"mode={mode_!r} is not a valid option. Only 'w' and 'a' are currently supported." + msg = ( + f"mode={repr(mode_)} is not a valid option." + "Only 'w' and 'a' are currently supported." + ) with pytest.raises(ValueError, match=msg): df.to_json(mode=mode_, lines=False, orient="records") @@ -344,10 +353,8 @@ def to_json_append_output(): df2.to_json(path, mode="a", lines=True, orient="records") # Read path file - result_df = read_json(path, lines=True) + result = read_json(path, lines=True) tm.assert_frame_equal(result, expected) - del expected - del result_df # Test 2: df1, df2, df3, df4 (in that order) expected = DataFrame( @@ -366,10 +373,8 @@ def to_json_append_output(): df4.to_json(path, mode="a", lines=True, orient="records") # Read path file - result_df = read_json(path, lines=True) + result = read_json(path, lines=True) tm.assert_frame_equal(result, expected) - del expected - del result_df # Test 3: df4, df3, df2, df1 (in that order) expected = DataFrame( @@ -377,7 +382,7 @@ def to_json_append_output(): "col4": [True, False, None, None, None, None, None, None], "col2": [None, None, "e", "f", "c", "d", "a", "b"], "col3": [None, None, "!", "#", None, None, None, None], - "col4": [None, None, None, None, 3, 4, 1, 2], + "col1": [None, None, None, None, 3, 4, 1, 2], } ) with tm.ensure_clean("test.json") as path: @@ -388,7 +393,5 @@ def to_json_append_output(): df1.to_json(path, mode="a", lines=True, orient="records") # Read path file - result_df = read_json(path, lines=True) + result = read_json(path, lines=True) tm.assert_frame_equal(result, expected) - del expected - del result_df From d36d07b427e3e24a77b5566f13ff2f8330457a54 Mon Sep 17 00:00:00 2001 From: SFuller4 <39442631+SFuller4@users.noreply.github.com> Date: Wed, 14 Sep 2022 12:58:24 -0500 Subject: [PATCH 13/16] Update pandas/io/json/_json.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/io/json/_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index d2315c9fa9406..0de7a5a2ef979 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -149,7 +149,7 @@ def to_json( if mode not in ["a", "w"]: msg = ( - f"mode={repr(mode)} is not a valid option." + f"mode={mode} is not a valid option." "Only 'w' and 'a' are currently supported." ) raise ValueError(msg) From 50ea8f80a86153912fdd59fea70bbb16c827fb87 Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Wed, 19 Oct 2022 18:05:50 -0500 Subject: [PATCH 14/16] fixing typing issues by adding mode to the overloads of to_json. Also removing whatsnew 1.6.0 --- doc/source/whatsnew/v1.6.0.rst | 234 --------------------------------- pandas/io/json/_json.py | 2 + 2 files changed, 2 insertions(+), 234 deletions(-) delete mode 100644 doc/source/whatsnew/v1.6.0.rst diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst deleted file mode 100644 index 420f732b8373e..0000000000000 --- a/doc/source/whatsnew/v1.6.0.rst +++ /dev/null @@ -1,234 +0,0 @@ -.. _whatsnew_160: - -What's new in 1.6.0 (??) ------------------------- - -These are the changes in pandas 1.6.0. See :ref:`release` for a full changelog -including other versions of pandas. - -{{ header }} - -.. --------------------------------------------------------------------------- -.. _whatsnew_160.enhancements: - -Enhancements -~~~~~~~~~~~~ - -.. _whatsnew_160.enhancements.enhancement1: - -enhancement1 -^^^^^^^^^^^^ - -.. _whatsnew_160.enhancements.enhancement2: - -enhancement2 -^^^^^^^^^^^^ - -.. _whatsnew_160.enhancements.other: - -Other enhancements -^^^^^^^^^^^^^^^^^^ -- :meth:`.GroupBy.quantile` now preserving nullable dtypes instead of casting to numpy dtypes (:issue:`37493`) -- :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`) -- :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`) -- Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`) -- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) - -.. --------------------------------------------------------------------------- -.. _whatsnew_160.notable_bug_fixes: - -Notable bug fixes -~~~~~~~~~~~~~~~~~ - -These are bug fixes that might have notable behavior changes. - -.. _whatsnew_160.notable_bug_fixes.notable_bug_fix1: - -notable_bug_fix1 -^^^^^^^^^^^^^^^^ - -.. _whatsnew_160.notable_bug_fixes.notable_bug_fix2: - -notable_bug_fix2 -^^^^^^^^^^^^^^^^ - -.. --------------------------------------------------------------------------- -.. _whatsnew_160.api_breaking: - -Backwards incompatible API changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. _whatsnew_160.api_breaking.deps: - -Increased minimum versions for dependencies -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Some minimum supported versions of dependencies were updated. -If installed, we now require: - -+-----------------+-----------------+----------+---------+ -| Package | Minimum Version | Required | Changed | -+=================+=================+==========+=========+ -| | | X | X | -+-----------------+-----------------+----------+---------+ - -For `optional libraries `_ the general recommendation is to use the latest version. -The following table lists the lowest version per library that is currently being tested throughout the development of pandas. -Optional libraries below the lowest tested version may still work, but are not considered supported. - -+-----------------+-----------------+---------+ -| Package | Minimum Version | Changed | -+=================+=================+=========+ -| | | X | -+-----------------+-----------------+---------+ - -See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. - -.. _whatsnew_160.api_breaking.other: - -Other API changes -^^^^^^^^^^^^^^^^^ -- -- - -.. --------------------------------------------------------------------------- -.. _whatsnew_160.deprecations: - -Deprecations -~~~~~~~~~~~~ -- -- - -.. --------------------------------------------------------------------------- -.. _whatsnew_160.performance: - -Performance improvements -~~~~~~~~~~~~~~~~~~~~~~~~ -- Performance improvement in :meth:`.GroupBy.median` for nullable dtypes (:issue:`37493`) -- Performance improvement in :meth:`MultiIndex.argsort` and :meth:`MultiIndex.sort_values` (:issue:`48406`) -- Performance improvement in :meth:`MultiIndex.union` without missing values and without duplicates (:issue:`48505`) -- Performance improvement in :meth:`.GroupBy.mean` and :meth:`.GroupBy.var` for extension array dtypes (:issue:`37493`) -- Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`) -- Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`) -- Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`) -- Performance improvement in ``var`` for nullable dtypes (:issue:`48379`). -- Performance improvement to :func:`read_sas` with ``blank_missing=True`` (:issue:`48502`) -- - -.. --------------------------------------------------------------------------- -.. _whatsnew_160.bug_fixes: - -Bug fixes -~~~~~~~~~ - -Categorical -^^^^^^^^^^^ -- -- - -Datetimelike -^^^^^^^^^^^^ -- Bug in :func:`pandas.infer_freq`, raising ``TypeError`` when inferred on :class:`RangeIndex` (:issue:`47084`) -- - -Timedelta -^^^^^^^^^ -- -- - -Timezones -^^^^^^^^^ -- -- - -Numeric -^^^^^^^ -- -- - -Conversion -^^^^^^^^^^ -- Bug in constructing :class:`Series` with ``int64`` dtype from a string list raising instead of casting (:issue:`44923`) -- - -Strings -^^^^^^^ -- -- - -Interval -^^^^^^^^ -- -- - -Indexing -^^^^^^^^ -- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`) -- Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`) -- Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`) - -Missing -^^^^^^^ -- -- - -MultiIndex -^^^^^^^^^^ -- Bug in :meth:`MultiIndex.unique` losing extension array dtype (:issue:`48335`) -- Bug in :meth:`MultiIndex.union` losing extension array (:issue:`48498`, :issue:`48505`) -- Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`) -- - -I/O -^^^ -- -- - -Period -^^^^^^ -- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, raising ``UnicodeDecodeError`` when a locale-specific directive was passed (:issue:`46319`) -- - -Plotting -^^^^^^^^ -- -- - -Groupby/resample/rolling -^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in :meth:`DataFrameGroupBy.sample` raises ``ValueError`` when the object is empty (:issue:`48459`) -- - -Reshaping -^^^^^^^^^ -- Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`) -- Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`) -- - -Sparse -^^^^^^ -- -- - -ExtensionArray -^^^^^^^^^^^^^^ -- Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`) -- - -Styler -^^^^^^ -- -- - -Other -^^^^^ - -.. ***DO NOT USE THIS SECTION*** - -- -- - -.. --------------------------------------------------------------------------- -.. _whatsnew_160.contributors: - -Contributors -~~~~~~~~~~~~ diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index c0ac490274738..5231fb2d39ca3 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -99,6 +99,7 @@ def to_json( index: bool = ..., indent: int = ..., storage_options: StorageOptions = ..., + mode: Literal["a", "w"] = ..., ) -> None: ... @@ -118,6 +119,7 @@ def to_json( index: bool = ..., indent: int = ..., storage_options: StorageOptions = ..., + mode: Literal["a", "w"] = ..., ) -> str: ... From 60887bc59813399f518b636e7402cd727c64589a Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Wed, 19 Oct 2022 19:01:04 -0500 Subject: [PATCH 15/16] moving enhancement information to whatsnew/v2.0.0 --- doc/source/whatsnew/v1.5.0.rst | 2 +- doc/source/whatsnew/v2.0.0.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 4dfd4aa79916f..212d12ab1d97c 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -332,7 +332,7 @@ Other enhancements - Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`) - The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`) - The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`) -- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) + .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index e57ba92267855..366e1f77d7caf 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -37,6 +37,7 @@ Other enhancements - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`) - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`) - :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`) +- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) .. --------------------------------------------------------------------------- .. _whatsnew_200.notable_bug_fixes: From a62d13d29d5a30cd76430d53549e578db7c0bfce Mon Sep 17 00:00:00 2001 From: SFuller4 Date: Wed, 19 Oct 2022 19:02:39 -0500 Subject: [PATCH 16/16] removing extra space from old whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 212d12ab1d97c..08dbb357c8053 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -333,7 +333,6 @@ Other enhancements - The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`) - The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`) - .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: