diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9eae2b7a33923..5736d7269cc24 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -520,4 +520,7 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) + +- Bug in ``to_json`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) + - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) diff --git a/pandas/io/tests/json/test_pandas.py b/pandas/io/tests/json/test_pandas.py index ee5039c38b182..440f5c13d5121 100644 --- a/pandas/io/tests/json/test_pandas.py +++ b/pandas/io/tests/json/test_pandas.py @@ -1044,3 +1044,13 @@ def roundtrip(s, encoding='latin-1'): for s in examples: roundtrip(s) + + def test_data_frame_size_after_to_json(self): + # GH15344 + df = DataFrame({'a': [str(1)]}) + + size_before = df.memory_usage(index=True, deep=True).sum() + df.to_json() + size_after = df.memory_usage(index=True, deep=True).sum() + + self.assertEqual(size_before, size_after) diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index 42c0b62a57511..e3c75d3b6e081 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -402,6 +402,16 @@ static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) { PyObject *obj = (PyObject *)_obj; + +#if (PY_VERSION_HEX >= 0x03030000) + if (PyUnicode_IS_COMPACT_ASCII(obj)) { + Py_ssize_t len; + char *data = PyUnicode_AsUTF8AndSize(obj, &len); + *_outLen = len; + return data; + } +#endif + PyObject *newObj = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj), PyUnicode_GET_SIZE(obj), NULL);