From a2175c07c1e126e1165597fb4387a5347f4bcfc6 Mon Sep 17 00:00:00 2001 From: Tolker-KU Date: Sun, 12 Jan 2025 12:27:40 +0100 Subject: [PATCH 1/4] Format decimal.Decimal as full precision strings in .to_json(...) --- .../src/vendored/ujson/python/objToJSON.c | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 5f35860c59cb7..5e0ddf0d345b4 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -373,6 +373,31 @@ static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) { return outValue; } +static char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc, + size_t *len) { + PyObject *obj = (PyObject *)_obj; + PyObject *str = PyObject_Str(obj); + if (str == NULL) { + *len = 0; + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "Failed to convert decimal"); + } + ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + return NULL; + } + if (PyUnicode_Check(str)) { + PyObject *tmp = str; + str = PyUnicode_AsUTF8String(str); + Py_DECREF(tmp); + } + + GET_TC(tc)->newObj = str; + + *len = PyBytes_GET_SIZE(str); + char *outValue = PyBytes_AS_STRING(str); + return outValue; +} + //============================================================================= // Numpy array iteration functions //============================================================================= @@ -1467,8 +1492,18 @@ static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { tc->type = JT_UTF8; return; } else if (object_is_decimal_type(obj)) { - pc->doubleValue = PyFloat_AsDouble(obj); - tc->type = JT_DOUBLE; + PyObject *is_nan_py = PyObject_RichCompare(obj, obj, Py_NE); + if (is_nan_py == NULL) { + goto INVALID; + } + int is_nan = (is_nan_py == Py_True); + Py_DECREF(is_nan_py); + if (is_nan) { + tc->type = JT_NULL; + return; + } + pc->PyTypeToUTF8 = PyDecimalToUTF8Callback; + tc->type = JT_UTF8; return; } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { if (object_is_nat_type(obj)) { From 1c6781d78eba00be373c36ec14d3eab8094dbd83 Mon Sep 17 00:00:00 2001 From: Tolker-KU Date: Sun, 12 Jan 2025 14:23:56 +0100 Subject: [PATCH 2/4] Fix failing tests --- .../json/test_json_table_schema_ext_dtype.py | 4 +-- pandas/tests/io/json/test_pandas.py | 7 +---- pandas/tests/io/json/test_ujson.py | 30 +++++++++---------- 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py index 8de289afe9ff9..12ae24b064c9d 100644 --- a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +++ b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py @@ -159,7 +159,7 @@ def test_build_decimal_series(self, dc): expected = OrderedDict( [ ("schema", schema), - ("data", [OrderedDict([("id", 0), ("a", 10.0)])]), + ("data", [OrderedDict([("id", 0), ("a", "10")])]), ] ) @@ -245,7 +245,7 @@ def test_to_json(self, da, dc, sa, ia): [ ("idx", 0), ("A", "2021-10-10T00:00:00.000"), - ("B", 10.0), + ("B", "10"), ("C", "pandas"), ("D", 10), ] diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ad9dbf7554a8b..59997d52179e6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,6 +1,5 @@ import datetime from datetime import timedelta -from decimal import Decimal from io import StringIO import json import os @@ -2025,12 +2024,8 @@ def test_to_s3(self, s3_public_bucket, s3so): timeout -= 0.1 assert timeout > 0, "Timed out waiting for file to appear on moto" - def test_json_pandas_nulls(self, nulls_fixture, request): + def test_json_pandas_nulls(self, nulls_fixture): # GH 31615 - if isinstance(nulls_fixture, Decimal): - mark = pytest.mark.xfail(reason="not implemented") - request.applymarker(mark) - expected_warning = None msg = ( "The default 'epoch' date format is deprecated and will be removed " diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 62118f1c82ebb..c5ccc3b3f7184 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -57,56 +57,56 @@ def test_encode_decimal(self): sut = decimal.Decimal("1337.1337") encoded = ujson.ujson_dumps(sut, double_precision=15) decoded = ujson.ujson_loads(encoded) - assert decoded == 1337.1337 + assert decoded == "1337.1337" sut = decimal.Decimal("0.95") encoded = ujson.ujson_dumps(sut, double_precision=1) - assert encoded == "1.0" + assert encoded == '"0.95"' decoded = ujson.ujson_loads(encoded) - assert decoded == 1.0 + assert decoded == "0.95" sut = decimal.Decimal("0.94") encoded = ujson.ujson_dumps(sut, double_precision=1) - assert encoded == "0.9" + assert encoded == '"0.94"' decoded = ujson.ujson_loads(encoded) - assert decoded == 0.9 + assert decoded == "0.94" sut = decimal.Decimal("1.95") encoded = ujson.ujson_dumps(sut, double_precision=1) - assert encoded == "2.0" + assert encoded == '"1.95"' decoded = ujson.ujson_loads(encoded) - assert decoded == 2.0 + assert decoded == "1.95" sut = decimal.Decimal("-1.95") encoded = ujson.ujson_dumps(sut, double_precision=1) - assert encoded == "-2.0" + assert encoded == '"-1.95"' decoded = ujson.ujson_loads(encoded) - assert decoded == -2.0 + assert decoded == "-1.95" sut = decimal.Decimal("0.995") encoded = ujson.ujson_dumps(sut, double_precision=2) - assert encoded == "1.0" + assert encoded == '"0.995"' decoded = ujson.ujson_loads(encoded) - assert decoded == 1.0 + assert decoded == "0.995" sut = decimal.Decimal("0.9995") encoded = ujson.ujson_dumps(sut, double_precision=3) - assert encoded == "1.0" + assert encoded == '"0.9995"' decoded = ujson.ujson_loads(encoded) - assert decoded == 1.0 + assert decoded == "0.9995" sut = decimal.Decimal("0.99999999999999944") encoded = ujson.ujson_dumps(sut, double_precision=15) - assert encoded == "1.0" + assert encoded == '"0.99999999999999944"' decoded = ujson.ujson_loads(encoded) - assert decoded == 1.0 + assert decoded == "0.99999999999999944" @pytest.mark.parametrize("ensure_ascii", [True, False]) def test_encode_string_conversion(self, ensure_ascii): From a17820d52e19783561f8d1b390d55f4a36e6ebe7 Mon Sep 17 00:00:00 2001 From: Tolker-KU Date: Mon, 13 Jan 2025 18:58:48 +0100 Subject: [PATCH 3/4] Clean up Decimal to utf8 convertion and switch to using PyObject_Format() to suppress scientific notation --- .../src/vendored/ujson/python/objToJSON.c | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 5e0ddf0d345b4..e7a438ce1aadd 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -376,25 +376,21 @@ static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) { static char *PyDecimalToUTF8Callback(JSOBJ _obj, JSONTypeContext *tc, size_t *len) { PyObject *obj = (PyObject *)_obj; - PyObject *str = PyObject_Str(obj); + PyObject *format_spec = PyUnicode_FromStringAndSize("f", 1); + PyObject *str = PyObject_Format(obj, format_spec); + Py_DECREF(format_spec); + if (str == NULL) { - *len = 0; - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, "Failed to convert decimal"); - } ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; return NULL; } - if (PyUnicode_Check(str)) { - PyObject *tmp = str; - str = PyUnicode_AsUTF8String(str); - Py_DECREF(tmp); - } GET_TC(tc)->newObj = str; - *len = PyBytes_GET_SIZE(str); - char *outValue = PyBytes_AS_STRING(str); + Py_ssize_t s_len; + char *outValue = (char *)PyUnicode_AsUTF8AndSize(str, &s_len); + *len = s_len; + return outValue; } From d60f71fe3d20bef8eca0861e742a392dc2c35d4a Mon Sep 17 00:00:00 2001 From: Tolker-KU Date: Mon, 13 Jan 2025 20:27:41 +0100 Subject: [PATCH 4/4] Add whatsnew entry --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 47838d1e49d61..9449ca5857127 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -51,6 +51,7 @@ Other enhancements - :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`) - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`) +- :meth:`DataFrame.to_json` now encodes ``Decimal`` as strings instead of floats (:issue:`60698`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`) - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)