From 70b73bd0f3080992e4ed3907e6d224634ae77566 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 26 Nov 2022 23:39:00 +0000 Subject: [PATCH 1/3] API: ensure read_json closes file handle --- pandas/io/json/_json.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 3020731b77a3c..92d5b584df504 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -896,20 +896,20 @@ def read(self) -> DataFrame | Series: Read the whole JSON input into a pandas object. """ obj: DataFrame | Series - if self.lines: - if self.chunksize: - obj = concat(self) - elif self.nrows: - lines = list(islice(self.data, self.nrows)) - lines_json = self._combine_lines(lines) - obj = self._get_object_parser(lines_json) + with self: + if self.lines: + if self.chunksize: + obj = concat(self) + elif self.nrows: + lines = list(islice(self.data, self.nrows)) + lines_json = self._combine_lines(lines) + obj = self._get_object_parser(lines_json) + else: + data = ensure_str(self.data) + data_lines = data.split("\n") + obj = self._get_object_parser(self._combine_lines(data_lines)) else: - data = ensure_str(self.data) - data_lines = data.split("\n") - obj = self._get_object_parser(self._combine_lines(data_lines)) - else: - obj = self._get_object_parser(self.data) - self.close() + obj = self._get_object_parser(self.data) return obj def _get_object_parser(self, json) -> DataFrame | Series: From e93b6da524f8977365f1b01683702e7009c82014 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 30 Nov 2022 21:55:19 +0000 Subject: [PATCH 2/3] updates --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/io/json/_json.py | 24 +++++++++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index f1ee96ddc3c16..4b434c1eec9f7 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -349,6 +349,7 @@ Other API changes - :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`) - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) - Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`) +- Improve handling of file closures in :func:`read_json` (:issue:`49921`) - :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`) - diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 92d5b584df504..5f02822b68d6d 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -750,8 +750,7 @@ def read_json( if chunksize: return json_reader - - with json_reader: + else: return json_reader.read() @@ -964,24 +963,27 @@ def __next__(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series ... def __next__(self) -> DataFrame | Series: - if self.nrows: - if self.nrows_seen >= self.nrows: - self.close() - raise StopIteration + if self.nrows and self.nrows_seen >= self.nrows: + self.close() + raise StopIteration lines = list(islice(self.data, self.chunksize)) - if lines: + if not lines: + self.close() + raise StopIteration + + try: lines_json = self._combine_lines(lines) obj = self._get_object_parser(lines_json) # Make sure that the returned objects have the right index. obj.index = range(self.nrows_seen, self.nrows_seen + len(obj)) self.nrows_seen += len(obj) + except Exception as ex: + self.close() + raise ex - return obj - - self.close() - raise StopIteration + return obj def __enter__(self) -> JsonReader[FrameSeriesStrT]: return self From d2d511cdac37e36b1ce15288bb60b52ba8fda7d5 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 1 Dec 2022 21:04:12 +0000 Subject: [PATCH 3/3] Update doc/source/whatsnew/v2.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4b434c1eec9f7..dd46cd533bb80 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -349,7 +349,7 @@ Other API changes - :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`) - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) - Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`) -- Improve handling of file closures in :func:`read_json` (:issue:`49921`) +- Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`) - :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`) -