Skip to content

ujson Py2 compat removal #26199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions pandas/_libs/src/ujson/python/JSONtoObj.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,12 @@ Numeric decoder derived from from TCL library
* Copyright (c) 1994 Sun Microsystems, Inc.
*/

// "py_defines.h" needs to be included first to
// avoid compilation errors, but it does violate
// styleguide checks with regards to include order.
#include "py_defines.h"
#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY
#define NO_IMPORT_ARRAY
#include <numpy/arrayobject.h> // NOLINT(build/include_order)
#include <ultrajson.h> // NOLINT(build/include_order)
#define PY_SSIZE_T_CLEAN
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This definition is suggested by the Python developers:

https://docs.python.org/3/c-api/intro.html#include-files

I noticed we haven't used anywhere else so could probably be added as a follow up in other places

#include <Python.h>
#include <numpy/arrayobject.h>
#include <ultrajson.h>

#define PRINTMARK()

Expand Down Expand Up @@ -470,7 +468,7 @@ JSOBJ Object_newArray(void *prv, void *decoder) { return PyList_New(0); }
JSOBJ Object_endArray(void *prv, JSOBJ obj) { return obj; }

JSOBJ Object_newInteger(void *prv, JSINT32 value) {
return PyInt_FromLong((long)value);
return PyLong_FromLong((long)value);
}

JSOBJ Object_newLong(void *prv, JSINT64 value) {
Expand Down Expand Up @@ -530,7 +528,7 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
decoder->preciseFloat = 1;
}

if (PyString_Check(arg)) {
if (PyBytes_Check(arg)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so if this is correct then the error message is wrong? should be bytes or unicode? is there something that hits this in testing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the error message and added a test to test_ujson. FWIW I don't think anything user-facing hits this as doing:

pd.read_json(None)

Would yield:

>>> pd.read_json(None)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/williamayd/clones/pandas/pandas/io/json/json.py", line 437, in read_json
    path_or_buf, encoding=encoding, compression=compression,
  File "/Users/williamayd/clones/pandas/pandas/io/common.py", line 207, in get_filepath_or_buffer
    raise ValueError(msg.format(_type=type(filepath_or_buffer)))
ValueError: Invalid file path or buffer object type: <class 'NoneType'>

You could hit it directly internally if you did something like:

import pandas._libs.json as ujson
ujson.loads(None)

which now yields:

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: Expected 'str' or 'bytes'

sarg = arg;
} else if (PyUnicode_Check(arg)) {
sarg = PyUnicode_AsUTF8String(arg);
Expand All @@ -539,7 +537,7 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
return NULL;
}
} else {
PyErr_Format(PyExc_TypeError, "Expected String or Unicode");
PyErr_Format(PyExc_TypeError, "Expected 'str' or 'bytes'");
return NULL;
}

Expand All @@ -559,8 +557,8 @@ PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) {
}
}

ret = JSON_DecodeObject(decoder, PyString_AS_STRING(sarg),
PyString_GET_SIZE(sarg));
ret = JSON_DecodeObject(decoder, PyBytes_AS_STRING(sarg),
PyBytes_GET_SIZE(sarg));

if (sarg != arg) {
Py_DECREF(sarg);
Expand Down
74 changes: 31 additions & 43 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,16 @@ Numeric decoder derived from from TCL library
*/
#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY

// "py_defines.h" needs to be included first to
// avoid compilation errors, but it does violate
// styleguide checks with regards to include order.
#include "py_defines.h" // NOLINT(build/include_order)
#include <math.h> // NOLINT(build/include_order)
#include <numpy/arrayobject.h> // NOLINT(build/include_order)
#include <numpy/arrayscalars.h> // NOLINT(build/include_order)
#include <numpy/ndarraytypes.h> // NOLINT(build/include_order)
#include <numpy/npy_math.h> // NOLINT(build/include_order)
#include <stdio.h> // NOLINT(build/include_order)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#include <Python.h> implies this so no need to state again

#include <ultrajson.h> // NOLINT(build/include_order)
#include <../../../tslibs/src/datetime/np_datetime.h> // NOLINT(build/include_order)
#include <../../../tslibs/src/datetime/np_datetime_strings.h> // NOLINT(build/include_order)
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <math.h>
#include <numpy/arrayobject.h>
#include <numpy/arrayscalars.h>
#include <numpy/ndarraytypes.h>
#include <numpy/npy_math.h>
#include <ultrajson.h>
#include <../../../tslibs/src/datetime/np_datetime.h>
#include <../../../tslibs/src/datetime/np_datetime_strings.h>
#include "datetime.h"

static PyObject *type_decimal;
Expand Down Expand Up @@ -281,7 +278,7 @@ static PyObject *get_values(PyObject *obj) {
repr = PyObject_Repr(dtype);
Py_DECREF(dtype);
} else {
repr = PyString_FromString("<unknown dtype>");
repr = PyUnicode_FromString("<unknown dtype>");
}

PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet",
Expand Down Expand Up @@ -341,7 +338,7 @@ static npy_int64 get_long_attr(PyObject *o, const char *attr) {
npy_int64 long_val;
PyObject *value = PyObject_GetAttrString(o, attr);
long_val = (PyLong_Check(value) ?
PyLong_AsLongLong(value) : PyInt_AS_LONG(value));
PyLong_AsLongLong(value) : PyLong_AsLong(value));
Py_DECREF(value);
return long_val;
}
Expand All @@ -355,7 +352,7 @@ static npy_float64 total_seconds(PyObject *td) {
}

static PyObject *get_item(PyObject *obj, Py_ssize_t i) {
PyObject *tmp = PyInt_FromSsize_t(i);
PyObject *tmp = PyLong_FromSsize_t(i);
PyObject *ret;

if (tmp == 0) {
Expand Down Expand Up @@ -385,14 +382,14 @@ static void *CLong(JSOBJ obj, JSONTypeContext *tc, void *outValue,
static void *PyIntToINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
size_t *_outLen) {
PyObject *obj = (PyObject *)_obj;
*((JSINT64 *)outValue) = PyInt_AS_LONG(obj);
*((JSINT64 *)outValue) = PyLong_AsLong(obj);
return NULL;
}
#else
static void *PyIntToINT32(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
size_t *_outLen) {
PyObject *obj = (PyObject *)_obj;
*((JSINT32 *)outValue) = PyInt_AS_LONG(obj);
*((JSINT32 *)outValue) = PyLong_AsLong(obj);
return NULL;
}
#endif
Expand Down Expand Up @@ -420,8 +417,8 @@ static void *PyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
size_t *_outLen) {
PyObject *obj = (PyObject *)_obj;
*_outLen = PyString_GET_SIZE(obj);
return PyString_AS_STRING(obj);
*_outLen = PyBytes_GET_SIZE(obj);
return PyBytes_AS_STRING(obj);
}

static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
Expand All @@ -442,8 +439,8 @@ static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,

GET_TC(tc)->newObj = newObj;

*_outLen = PyString_GET_SIZE(newObj);
return PyString_AS_STRING(newObj);
*_outLen = PyBytes_GET_SIZE(newObj);
return PyBytes_AS_STRING(newObj);
}

static void *PandasDateTimeStructToJSON(npy_datetimestruct *dts,
Expand Down Expand Up @@ -546,8 +543,8 @@ static void *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, void *outValue,

GET_TC(tc)->newObj = str;

*outLen = PyString_GET_SIZE(str);
outValue = (void *)PyString_AS_STRING(str);
*outLen = PyBytes_GET_SIZE(str);
outValue = (void *)PyBytes_AS_STRING(str);
return outValue;
}

Expand Down Expand Up @@ -1248,13 +1245,8 @@ int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) {

for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index++) {
attrName = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index);
#if PY_MAJOR_VERSION >= 3
attr = PyUnicode_AsUTF8String(attrName);
#else
attr = attrName;
Py_INCREF(attr);
#endif
attrStr = PyString_AS_STRING(attr);
attrStr = PyBytes_AS_STRING(attr);

if (attrStr[0] == '_') {
PRINTMARK();
Expand Down Expand Up @@ -1307,8 +1299,8 @@ JSOBJ Dir_iterGetValue(JSOBJ obj, JSONTypeContext *tc) {

char *Dir_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) {
PRINTMARK();
*outLen = PyString_GET_SIZE(GET_TC(tc)->itemName);
return PyString_AS_STRING(GET_TC(tc)->itemName);
*outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
return PyBytes_AS_STRING(GET_TC(tc)->itemName);
}

//=============================================================================
Expand Down Expand Up @@ -1525,9 +1517,7 @@ void Dict_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
}

int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc) {
#if PY_MAJOR_VERSION >= 3
PyObject *itemNameTmp;
#endif

if (GET_TC(tc)->itemName) {
Py_DECREF(GET_TC(tc)->itemName);
Expand All @@ -1542,13 +1532,11 @@ int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc) {

if (PyUnicode_Check(GET_TC(tc)->itemName)) {
GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName);
} else if (!PyString_Check(GET_TC(tc)->itemName)) {
} else if (!PyBytes_Check(GET_TC(tc)->itemName)) {
GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName);
#if PY_MAJOR_VERSION >= 3
itemNameTmp = GET_TC(tc)->itemName;
GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName);
Py_DECREF(itemNameTmp);
#endif
} else {
Py_INCREF(GET_TC(tc)->itemName);
}
Expand All @@ -1570,8 +1558,8 @@ JSOBJ Dict_iterGetValue(JSOBJ obj, JSONTypeContext *tc) {
}

char *Dict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) {
*outLen = PyString_GET_SIZE(GET_TC(tc)->itemName);
return PyString_AS_STRING(GET_TC(tc)->itemName);
*outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
return PyBytes_AS_STRING(GET_TC(tc)->itemName);
}

void NpyArr_freeLabels(char **labels, npy_intp len) {
Expand Down Expand Up @@ -1788,7 +1776,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
}

return;
} else if (PyInt_Check(obj)) {
} else if (PyLong_Check(obj)) {
PRINTMARK();

#ifdef _LP64
Expand All @@ -1809,7 +1797,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
tc->type = JT_DOUBLE;
}
return;
} else if (PyString_Check(obj)) {
} else if (PyBytes_Check(obj)) {
PRINTMARK();
pc->PyTypeToJSON = PyStringToUTF8;
tc->type = JT_UTF8;
Expand Down Expand Up @@ -1932,7 +1920,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
tmpObj = PyObject_Repr(obj);
PyErr_Format(PyExc_TypeError,
"%s (0d array) is not JSON serializable at the moment",
PyString_AS_STRING(tmpObj));
PyBytes_AS_STRING(tmpObj));
Py_DECREF(tmpObj);
goto INVALID;
}
Expand Down Expand Up @@ -2469,7 +2457,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
return NULL;
}

newobj = PyString_FromString(ret);
newobj = PyUnicode_FromString(ret);

if (ret != buffer) {
encoder->free(ret);
Expand Down
58 changes: 0 additions & 58 deletions pandas/_libs/src/ujson/python/py_defines.h

This file was deleted.

17 changes: 3 additions & 14 deletions pandas/_libs/src/ujson/python/ujson.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ Numeric decoder derived from from TCL library
* Copyright (c) 1994 Sun Microsystems, Inc.
*/

#include "py_defines.h"
#include "version.h"
#define PY_SSIZE_T_CLEAN
#include <Python.h>

/* objToJSON */
PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs);
Expand Down Expand Up @@ -76,8 +77,6 @@ static PyMethodDef ujsonMethods[] = {
{NULL, NULL, 0, NULL} /* Sentinel */
};

#if PY_MAJOR_VERSION >= 3

static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"_libjson",
Expand All @@ -94,14 +93,6 @@ static struct PyModuleDef moduledef = {
#define PYMODULE_CREATE() PyModule_Create(&moduledef)
#define MODINITERROR return NULL

#else

#define PYMODINITFUNC PyMODINIT_FUNC initjson(void)
#define PYMODULE_CREATE() Py_InitModule("json", ujsonMethods)
#define MODINITERROR return

#endif

PYMODINITFUNC {
PyObject *module;
PyObject *version_string;
Expand All @@ -113,10 +104,8 @@ PYMODINITFUNC {
MODINITERROR;
}

version_string = PyString_FromString(UJSON_VERSION);
version_string = PyUnicode_FromString(UJSON_VERSION);
PyModule_AddObject(module, "__version__", version_string);

#if PY_MAJOR_VERSION >= 3
return module;
#endif
}
5 changes: 5 additions & 0 deletions pandas/tests/io/json/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,11 @@ def test_load_file_args_error(self):
with pytest.raises(TypeError):
ujson.load("[]")

def test_loads_non_str_bytes_raises(self):
msg = "Expected 'str' or 'bytes'"
with pytest.raises(TypeError, match=msg):
ujson.loads(None)

def test_version(self):
assert re.match(r'^\d+\.\d+(\.\d+)?$', ujson.__version__), \
"ujson.__version__ must be a string like '1.4.0'"
Expand Down