Skip to content

Continued ujson cleanups #26222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 30, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 35 additions & 52 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,9 @@ Numeric decoder derived from from TCL library
#include <../../../tslibs/src/datetime/np_datetime_strings.h>
#include "datetime.h"

static PyObject *type_decimal;

#define NPY_JSON_BUFSIZE 32768

static PyTypeObject *type_decimal;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was previously a static PyObject, but the CPython guide says you should never declare static PyObjects. Swapped over to PyTypeObject to match semantics of others in the extension:

https://docs.python.org/3/c-api/intro.html#objects-types-and-reference-counts

static PyTypeObject *cls_dataframe;
static PyTypeObject *cls_series;
static PyTypeObject *cls_index;
Expand Down Expand Up @@ -154,8 +153,8 @@ void *initObjToJSON(void)
PyObject *mod_pandas;
PyObject *mod_nattype;
PyObject *mod_decimal = PyImport_ImportModule("decimal");
type_decimal = PyObject_GetAttrString(mod_decimal, "Decimal");
Py_INCREF(type_decimal);
type_decimal =
(PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal");
Py_DECREF(mod_decimal);

PyDateTime_IMPORT;
Expand Down Expand Up @@ -628,44 +627,39 @@ void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
obj = (PyArrayObject *)_obj;
}

if (PyArray_SIZE(obj) < 0) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This branch of the condition was unreachable

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shoyer is there some special case this might have been intended for?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not familiar with the context here, but I agree that this was unreachable.

Maybe it was intended to cover size 0 arrays, may not be representable in JSON? e.g., an shape with shape (0, 1)

PRINTMARK();
GET_TC(tc)->iterNext = NpyArr_iterNextNone;
PRINTMARK();
npyarr = PyObject_Malloc(sizeof(NpyArrContext));
GET_TC(tc)->npyarr = npyarr;

if (!npyarr) {
PyErr_NoMemory();
GET_TC(tc)->iterNext = NpyArr_iterNextNone;
return;
}

npyarr->array = (PyObject *)obj;
npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem;
npyarr->dataptr = PyArray_DATA(obj);
npyarr->ndim = PyArray_NDIM(obj) - 1;
npyarr->curdim = 0;
npyarr->type_num = PyArray_DESCR(obj)->type_num;

if (GET_TC(tc)->transpose) {
npyarr->dim = PyArray_DIM(obj, npyarr->ndim);
npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim);
npyarr->stridedim = npyarr->ndim;
npyarr->index[npyarr->ndim] = 0;
npyarr->inc = -1;
} else {
PRINTMARK();
npyarr = PyObject_Malloc(sizeof(NpyArrContext));
GET_TC(tc)->npyarr = npyarr;

if (!npyarr) {
PyErr_NoMemory();
GET_TC(tc)->iterNext = NpyArr_iterNextNone;
return;
}

npyarr->array = (PyObject *)obj;
npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem;
npyarr->dataptr = PyArray_DATA(obj);
npyarr->ndim = PyArray_NDIM(obj) - 1;
npyarr->curdim = 0;
npyarr->type_num = PyArray_DESCR(obj)->type_num;

if (GET_TC(tc)->transpose) {
npyarr->dim = PyArray_DIM(obj, npyarr->ndim);
npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim);
npyarr->stridedim = npyarr->ndim;
npyarr->index[npyarr->ndim] = 0;
npyarr->inc = -1;
} else {
npyarr->dim = PyArray_DIM(obj, 0);
npyarr->stride = PyArray_STRIDE(obj, 0);
npyarr->stridedim = 0;
npyarr->index[0] = 0;
npyarr->inc = 1;
}

npyarr->columnLabels = GET_TC(tc)->columnLabels;
npyarr->rowLabels = GET_TC(tc)->rowLabels;
npyarr->dim = PyArray_DIM(obj, 0);
npyarr->stride = PyArray_STRIDE(obj, 0);
npyarr->stridedim = 0;
npyarr->index[0] = 0;
npyarr->inc = 1;
}

npyarr->columnLabels = GET_TC(tc)->columnLabels;
npyarr->rowLabels = GET_TC(tc)->rowLabels;
}

void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
Expand Down Expand Up @@ -1750,17 +1744,6 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
goto INVALID;
}

return;
} else if (PyLong_Check(obj)) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also unreachable; this used to be PyInt_Check so could yield something in Python2, but with switch to Python3 was duplicative of the PyLong_Check that immediately preceded it

PRINTMARK();

#ifdef _LP64
pc->PyTypeToJSON = PyIntToINT64;
tc->type = JT_LONG;
#else
pc->PyTypeToJSON = PyIntToINT32;
tc->type = JT_INT;
#endif
return;
} else if (PyFloat_Check(obj)) {
PRINTMARK();
Expand All @@ -1782,7 +1765,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
pc->PyTypeToJSON = PyUnicodeToUTF8;
tc->type = JT_UTF8;
return;
} else if (PyObject_IsInstance(obj, type_decimal)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there any Decimal subclasses to worry about?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PyObject_TypeCheck returns true for subtypes as well, so I don't think that matters:

https://docs.python.org/3.7/c-api/object.html#c.PyObject_TypeCheck

} else if (PyObject_TypeCheck(obj, type_decimal)) {
PRINTMARK();
pc->PyTypeToJSON = PyFloatToDOUBLE;
tc->type = JT_DOUBLE;
Expand Down