From aec58e66241b727fabf4d68ae92770211d4a8cdf Mon Sep 17 00:00:00 2001 From: Funnycrab Date: Sun, 2 Apr 2017 12:49:46 +0800 Subject: [PATCH 1/6] BUG: Fix rollover handling in json encoding This is a fix attempt for issue #15716 as well as #15864. Note that whenever the frac is incremented, there is a chance that its value may hit the value of pow10. --- pandas/_libs/src/ujson/lib/ultrajsonenc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c index 5a15071938c1a..b8063ad605f75 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -823,17 +823,18 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, if (diff > 0.5) { ++frac; - /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */ - if (frac >= pow10) { - frac = 0; - ++whole; - } } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { /* if halfway, round up if odd, OR if last digit is 0. That last part is strange */ ++frac; } + /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 and case 0.95 with prec is 1.0 as well */ + if (frac >= pow10) { + frac = 0; + ++whole; + } + if (enc->doublePrecision == 0) { diff = value - whole; From 6acb969b9d82d64134281a087a501cb755558b7e Mon Sep 17 00:00:00 2001 From: funnycrab Date: Mon, 3 Apr 2017 01:02:28 +0800 Subject: [PATCH 2/6] fix for cpplint --- pandas/_libs/src/ujson/lib/ultrajsonenc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c index b8063ad605f75..6bf2297749006 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -829,11 +829,12 @@ int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, ++frac; } - /* handle rollover, e.g. case 0.99 with prec 1 is 1.0 and case 0.95 with prec is 1.0 as well */ - if (frac >= pow10) { - frac = 0; - ++whole; - } + // handle rollover, e.g. + // case 0.99 with prec 1 is 1.0 and case 0.95 with prec is 1.0 as well + if (frac >= pow10) { + frac = 0; + ++whole; + } if (enc->doublePrecision == 0) { diff = value - whole; From 75effb4add82015f8d3fdaea06409cc2af9dded4 Mon Sep 17 00:00:00 2001 From: funnycrab Date: Mon, 3 Apr 2017 01:02:45 +0800 Subject: [PATCH 3/6] add tests --- pandas/tests/io/json/test_ujson.py | 43 ++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index e66721beed288..796432b01e76b 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -43,6 +43,49 @@ def test_encodeDecimal(self): decoded = ujson.decode(encoded) self.assertEqual(decoded, 1337.1337) + sut = decimal.Decimal("0.95") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + + sut = decimal.Decimal("0.94") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "0.9") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 0.9) + + sut = decimal.Decimal("1.95") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "2.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 2.0) + + + sut = decimal.Decimal("-1.95") + encoded = ujson.encode(sut, double_precision=1) + self.assertEqual(encoded, "-2.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, -2.0) + + sut = decimal.Decimal("0.995") + encoded = ujson.encode(sut, double_precision=2) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + + sut = decimal.Decimal("0.9995") + encoded = ujson.encode(sut, double_precision=3) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + + sut = decimal.Decimal("0.99999999999999944") + encoded = ujson.encode(sut, double_precision=15) + self.assertEqual(encoded, "1.0") + decoded = ujson.decode(encoded) + self.assertEqual(decoded, 1.0) + def test_encodeStringConversion(self): input = "A string \\ / \b \f \n \r \t &" not_html_encoded = ('"A string \\\\ \\/ \\b \\f \\n ' From 9b0dff00849d6410885db4443de0c98e2ff8ea43 Mon Sep 17 00:00:00 2001 From: funnycrab Date: Mon, 3 Apr 2017 01:36:49 +0800 Subject: [PATCH 4/6] remove additional blank line --- pandas/tests/io/json/test_ujson.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 796432b01e76b..c2cbbe1ca65ab 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -61,7 +61,6 @@ def test_encodeDecimal(self): decoded = ujson.decode(encoded) self.assertEqual(decoded, 2.0) - sut = decimal.Decimal("-1.95") encoded = ujson.encode(sut, double_precision=1) self.assertEqual(encoded, "-2.0") From 3cee6b32150cfbeb36d69c742e9bb394341fa545 Mon Sep 17 00:00:00 2001 From: funnycrab Date: Mon, 3 Apr 2017 07:41:58 +0800 Subject: [PATCH 5/6] add whatsnew entry --- doc/source/whatsnew/v0.20.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a34b9feb2b2fa..be9b52e1051f1 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -988,6 +988,7 @@ I/O - Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) +- Bug in ``pd.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) Plotting ^^^^^^^^ From c9710ee717cc89f07c80da5c376b9e89b0e5a799 Mon Sep 17 00:00:00 2001 From: funnycrab Date: Mon, 3 Apr 2017 07:43:24 +0800 Subject: [PATCH 6/6] add more tests for examples listed in issue #15716 and #15864 --- pandas/tests/io/json/test_pandas.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7dbcf25c60b45..8fc8ecbdf8abc 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -380,6 +380,31 @@ def test_frame_from_json_nones(self): unser = read_json(df.to_json(), dtype=False) self.assertTrue(np.isnan(unser[2][0])) + def test_frame_to_json_float_precision(self): + df = pd.DataFrame([dict(a_float=0.95)]) + encoded = df.to_json(double_precision=1) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + + df = pd.DataFrame([dict(a_float=1.95)]) + encoded = df.to_json(double_precision=1) + self.assertEqual(encoded, '{"a_float":{"0":2.0}}') + + df = pd.DataFrame([dict(a_float=-1.95)]) + encoded = df.to_json(double_precision=1) + self.assertEqual(encoded, '{"a_float":{"0":-2.0}}') + + df = pd.DataFrame([dict(a_float=0.995)]) + encoded = df.to_json(double_precision=2) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + + df = pd.DataFrame([dict(a_float=0.9995)]) + encoded = df.to_json(double_precision=3) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + + df = pd.DataFrame([dict(a_float=0.99999999999999944)]) + encoded = df.to_json(double_precision=15) + self.assertEqual(encoded, '{"a_float":{"0":1.0}}') + def test_frame_to_json_except(self): df = DataFrame([1, 2, 3]) self.assertRaises(ValueError, df.to_json, orient="garbage")