From 9bfd45d35f15c24cc871e3aed54e9cc6f49bdac6 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 29 Apr 2019 09:49:33 -0700 Subject: [PATCH 1/4] Removed PyArray_Number special handling --- pandas/_libs/src/ujson/python/objToJSON.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index ef6ff61a29630..a70d675ee3f32 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -709,7 +709,7 @@ int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { NpyArr_freeItemValue(obj, tc); - if (PyArray_ISNUMBER(npyarr->array) || PyArray_ISDATETIME(npyarr->array)) + if (PyArray_ISDATETIME(npyarr->array)) { PRINTMARK(); GET_TC(tc)->itemValue = obj; From 94ca5819d953433630286a44b979b74f172a2ac1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 29 Apr 2019 10:40:23 -0700 Subject: [PATCH 2/4] Added ASV for memory benchmark --- asv_bench/benchmarks/io/json.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index ec2ddc11b7c1d..1844fe4fbb0de 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -124,4 +124,16 @@ def time_float_int_str_lines(self, orient): self.df_int_float_str.to_json(self.fname, orient='records', lines=True) +class ToJSONMem: + + def setup_cache(self): + df = DataFrame([[1]]) + + return df + + def peakmem_numeric(self, df): + for _ in range(100_000): + df.to_json() + + from ..pandas_vb_common import setup # noqa: F401 From a908bc0678301b15eca71c36ee0f0f6c8187959e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 29 Apr 2019 10:48:19 -0700 Subject: [PATCH 3/4] Expanded benchmark to cover floats --- asv_bench/benchmarks/io/json.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index 1844fe4fbb0de..19d11e6610198 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -128,10 +128,20 @@ class ToJSONMem: def setup_cache(self): df = DataFrame([[1]]) + frames = { + 'int': df, + 'float': df.astype(float), + } - return df + return frames - def peakmem_numeric(self, df): + def peakmem_int(self, frames): + df = frames['int'] + for _ in range(100_000): + df.to_json() + + def peakmem_float(self, frames): + df = frames['float'] for _ in range(100_000): df.to_json() From 24e1a23630850d7f5f58b97930b4e89c98cdbdfe Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 29 Apr 2019 10:59:20 -0700 Subject: [PATCH 4/4] Added whatsnew --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index b2a379d9fe6f5..7e0662677b9a8 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -368,6 +368,7 @@ I/O - Improved the ``col_space`` parameter in :meth:`DataFrame.to_html` to accept a string so CSS length values can be set correctly (:issue:`25941`) - Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`) - Adds ``use_bqstorage_api`` parameter to :func:`read_gbq` to speed up downloads of large data frames. This feature requires version 0.10.0 of the ``pandas-gbq`` library as well as the ``google-cloud-bigquery-storage`` and ``fastavro`` libraries. (:issue:`26104`) +- Fixed memory leak in :meth:`DataFrame.to_json` when dealing with numeric data (:issue:`24889`) Plotting ^^^^^^^^