From b700991b913c0699c2f8fccf07ebbb40f71bcd60 Mon Sep 17 00:00:00 2001 From: Joel Gibson Date: Sun, 8 Aug 2021 11:34:06 +1000 Subject: [PATCH 1/6] BUG: Attributes skipped when serialising plain Python objects to JSON (#42768) --- doc/source/whatsnew/v1.3.2.rst | 1 + pandas/_libs/src/ujson/python/objToJSON.c | 42 ++++++++--------------- pandas/tests/io/json/test_ujson.py | 15 ++++++++ 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index e68f5d9d6bc56..701f31c0498b1 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -36,6 +36,7 @@ Bug fixes - :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`) - :meth:`.Styler.set_sticky` has amended CSS to control the column/index names and ensure the correct sticky positions (:issue:`42537`) - Bug in de-serializing datetime indexes in PYTHONOPTIMIZED mode (:issue:`42866`) +- Some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index cf530c8c07440..c69bc041c0450 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -882,37 +882,33 @@ void Dir_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { PyObject *obj = (PyObject *)_obj; - PyObject *itemValue = GET_TC(tc)->itemValue; - PyObject *itemName = GET_TC(tc)->itemName; - PyObject *attr; - PyObject *attrName; - char *attrStr; if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { return 0; } - if (itemValue) { + if (GET_TC(tc)->itemValue) { Py_DECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = itemValue = NULL; + GET_TC(tc)->itemValue = NULL; } - if (itemName) { + if (GET_TC(tc)->itemName) { Py_DECREF(GET_TC(tc)->itemName); - GET_TC(tc)->itemName = itemName = NULL; + GET_TC(tc)->itemName = NULL; } for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index++) { - attrName = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); - attr = PyUnicode_AsUTF8String(attrName); - attrStr = PyBytes_AS_STRING(attr); + PyObject *attrName = + PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); + PyObject *attr = PyUnicode_AsUTF8String(attrName); + char *attrStr = PyBytes_AS_STRING(attr); if (attrStr[0] == '_') { Py_DECREF(attr); continue; } - itemValue = PyObject_GetAttr(obj, attrName); + PyObject *itemValue = PyObject_GetAttr(obj, attrName); if (itemValue == NULL) { PyErr_Clear(); Py_DECREF(attr); @@ -925,25 +921,15 @@ int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { continue; } - GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemName = attr; GET_TC(tc)->itemValue = itemValue; GET_TC(tc)->index++; - - itemName = attr; - break; - } - - if (itemName == NULL) { - GET_TC(tc)->index = GET_TC(tc)->size; - GET_TC(tc)->itemValue = NULL; - return 0; + return 1; } - GET_TC(tc)->itemName = itemName; - GET_TC(tc)->itemValue = itemValue; - GET_TC(tc)->index++; - - return 1; + GET_TC(tc)->index = GET_TC(tc)->size; + GET_TC(tc)->itemValue = NULL; + return 0; } JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 57a6b214cec84..3051df298dddd 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -720,6 +720,21 @@ def my_obj_handler(_): ujson.encode(obj_list, default_handler=str) ) + def test_encode_object(self): + # Keys should be all non-callable non-underscore attributes, see GH-42768 + class _TestObject: + def __init__(self, a, b, _c, d): + self.a = a + self.b = b + self._c = _c + self.d = d + + def e(self): + return 5 + + test_object = _TestObject(a=1, b=2, _c=3, d=4) + assert ujson.decode(ujson.encode(test_object)) == {"a": 1, "b": 2, "d": 4} + class TestNumpyJSONTests: @pytest.mark.parametrize("bool_input", [True, False]) From f0aaa5b4ad25cc916a7f07bd2667ff1158d61381 Mon Sep 17 00:00:00 2001 From: Joel Gibson Date: Sun, 8 Aug 2021 12:28:20 +1000 Subject: [PATCH 2/6] Moved change to 1.4 --- doc/source/whatsnew/v1.3.2.rst | 1 - doc/source/whatsnew/v1.4.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 701f31c0498b1..e68f5d9d6bc56 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -36,7 +36,6 @@ Bug fixes - :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`) - :meth:`.Styler.set_sticky` has amended CSS to control the column/index names and ensure the correct sticky positions (:issue:`42537`) - Bug in de-serializing datetime indexes in PYTHONOPTIMIZED mode (:issue:`42866`) -- Some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`) - .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index fa9c424351b00..de7365839e509 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -258,6 +258,7 @@ I/O - Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`) - Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`) - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`) +- Some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`) - Period From 4c566f410ef6d5eac1688409ff3d250ed99af899 Mon Sep 17 00:00:00 2001 From: Joel Gibson Date: Mon, 9 Aug 2021 09:20:11 +1000 Subject: [PATCH 3/6] Condensed change, added test, updated doc. --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/_libs/src/ujson/python/objToJSON.c | 43 +++++++++++++++-------- pandas/tests/io/json/test_pandas.py | 15 ++++++++ pandas/tests/io/json/test_ujson.py | 2 +- 4 files changed, 45 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index de7365839e509..445d33a13ee6c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -258,7 +258,7 @@ I/O - Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`) - Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`) - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`) -- Some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`) +- Bug in :func:`to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`) - Period diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index c69bc041c0450..06c3d823f72d2 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -882,33 +882,37 @@ void Dir_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { PyObject *obj = (PyObject *)_obj; + PyObject *itemValue = GET_TC(tc)->itemValue; + PyObject *itemName = GET_TC(tc)->itemName; + PyObject *attr; + PyObject *attrName; + char *attrStr; if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { return 0; } - if (GET_TC(tc)->itemValue) { + if (itemValue) { Py_DECREF(GET_TC(tc)->itemValue); - GET_TC(tc)->itemValue = NULL; + GET_TC(tc)->itemValue = itemValue = NULL; } - if (GET_TC(tc)->itemName) { + if (itemName) { Py_DECREF(GET_TC(tc)->itemName); - GET_TC(tc)->itemName = NULL; + GET_TC(tc)->itemName = itemName = NULL; } for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index++) { - PyObject *attrName = - PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); - PyObject *attr = PyUnicode_AsUTF8String(attrName); - char *attrStr = PyBytes_AS_STRING(attr); + attrName = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); + attr = PyUnicode_AsUTF8String(attrName); + attrStr = PyBytes_AS_STRING(attr); if (attrStr[0] == '_') { Py_DECREF(attr); continue; } - PyObject *itemValue = PyObject_GetAttr(obj, attrName); + itemValue = PyObject_GetAttr(obj, attrName); if (itemValue == NULL) { PyErr_Clear(); Py_DECREF(attr); @@ -921,15 +925,24 @@ int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { continue; } - GET_TC(tc)->itemName = attr; + GET_TC(tc)->itemName = itemName; GET_TC(tc)->itemValue = itemValue; - GET_TC(tc)->index++; - return 1; + + itemName = attr; + break; } - GET_TC(tc)->index = GET_TC(tc)->size; - GET_TC(tc)->itemValue = NULL; - return 0; + if (itemName == NULL) { + GET_TC(tc)->index = GET_TC(tc)->size; + GET_TC(tc)->itemValue = NULL; + return 0; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + GET_TC(tc)->index++; + + return 1; } JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d97ba8694818b..55b4eb4beb594 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1769,3 +1769,18 @@ def test_to_json_multiindex_escape(self): "\"(Timestamp('2017-01-23 00:00:00'), 'bar')\":true}" ) assert result == expected + + def to_json_series_of_objects(self): + class _TestObject: + def __init__(self, a, b, _c, d): + self.a = a + self.b = b + self._c = _c + self.d = d + + def e(self): + return 5 + + # JSON keys should be all non-callable non-underscore attributes, see GH-42768 + series = pd.Series([_TestObject(a=1, b=2, _c=3, d=4)]) + assert json.loads(series.to_json()) == {"0": {"a": 1, "b": 2, "d": 4}} diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 3051df298dddd..58ccd31b7c940 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -721,7 +721,6 @@ def my_obj_handler(_): ) def test_encode_object(self): - # Keys should be all non-callable non-underscore attributes, see GH-42768 class _TestObject: def __init__(self, a, b, _c, d): self.a = a @@ -732,6 +731,7 @@ def __init__(self, a, b, _c, d): def e(self): return 5 + # JSON keys should be all non-callable non-underscore attributes, see GH-42768 test_object = _TestObject(a=1, b=2, _c=3, d=4) assert ujson.decode(ujson.encode(test_object)) == {"a": 1, "b": 2, "d": 4} From 2c3744c82a98c32e4a41dd1e000d9fdc765a3042 Mon Sep 17 00:00:00 2001 From: Joel Gibson Date: Mon, 9 Aug 2021 09:25:37 +1000 Subject: [PATCH 4/6] Updated doc. --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 445d33a13ee6c..4108524e1239d 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -258,7 +258,7 @@ I/O - Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`) - Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`) - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`) -- Bug in :func:`to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`) +- Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`) - Period From a06b4ab17a5fc182b98937221b94dffac2f740d7 Mon Sep 17 00:00:00 2001 From: Joel Gibson Date: Mon, 9 Aug 2021 09:42:22 +1000 Subject: [PATCH 5/6] Fix reference to Series. --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 55b4eb4beb594..cf2bd392363f1 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1782,5 +1782,5 @@ def e(self): return 5 # JSON keys should be all non-callable non-underscore attributes, see GH-42768 - series = pd.Series([_TestObject(a=1, b=2, _c=3, d=4)]) + series = Series([_TestObject(a=1, b=2, _c=3, d=4)]) assert json.loads(series.to_json()) == {"0": {"a": 1, "b": 2, "d": 4}} From caa4f5eff0eab8a01c80d3d99aa8d0098549ad15 Mon Sep 17 00:00:00 2001 From: Joel Gibson Date: Tue, 10 Aug 2021 09:03:14 +1000 Subject: [PATCH 6/6] Update pandas/tests/io/json/test_pandas.py Co-authored-by: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> --- pandas/tests/io/json/test_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index cf2bd392363f1..77a3394c08ef2 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1770,7 +1770,7 @@ def test_to_json_multiindex_escape(self): ) assert result == expected - def to_json_series_of_objects(self): + def test_to_json_series_of_objects(self): class _TestObject: def __init__(self, a, b, _c, d): self.a = a