diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index de336fb3aa1dc..4b612bb033761 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -16,18 +16,19 @@ derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) https://github.com/client9/stringencoders -Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. Numeric decoder derived from from TCL library http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms @@ -64,9 +65,9 @@ typedef void *(*PFN_PyTypeToJSON)(JSOBJ obj, JSONTypeContext *ti, typedef struct __NpyArrContext { PyObject *array; char *dataptr; - int curdim; // current dimension in array's order - int stridedim; // dimension we are striding over - int inc; // stride dimension increment (+/- 1) + int curdim; // current dimension in array's order + int stridedim; // dimension we are striding over + int inc; // stride dimension increment (+/- 1) npy_intp dim; npy_intp stride; npy_intp ndim; @@ -83,8 +84,8 @@ typedef struct __PdBlockContext { int ncols; int transpose; - int *cindices; // frame column -> block column map - NpyArrContext **npyCtxts; // NpyArrContext for each column + int *cindices; // frame column -> block column map + NpyArrContext **npyCtxts; // NpyArrContext for each column } PdBlockContext; typedef struct __TypeContext { @@ -148,13 +149,12 @@ enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES }; int PdBlock_iterNext(JSOBJ, JSONTypeContext *); -void *initObjToJSON(void) -{ +void *initObjToJSON(void) { PyObject *mod_pandas; PyObject *mod_nattype; PyObject *mod_decimal = PyImport_ImportModule("decimal"); type_decimal = - (PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal"); + (PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal"); Py_DECREF(mod_decimal); PyDateTime_IMPORT; @@ -167,14 +167,14 @@ void *initObjToJSON(void) cls_series = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series"); cls_timestamp = PyObject_GetAttrString(mod_pandas, "Timestamp"); - cls_timedelta = PyObject_GetAttrString(mod_pandas, "Timedelta"); + cls_timedelta = PyObject_GetAttrString(mod_pandas, "Timedelta"); Py_DECREF(mod_pandas); } mod_nattype = PyImport_ImportModule("pandas._libs.tslibs.nattype"); if (mod_nattype) { - cls_nat = (PyTypeObject *)PyObject_GetAttrString(mod_nattype, - "NaTType"); + cls_nat = + (PyTypeObject *)PyObject_GetAttrString(mod_nattype, "NaTType"); Py_DECREF(mod_nattype); } @@ -212,7 +212,6 @@ static TypeContext *createTypeContext(void) { return pc; } - static int is_sparse_array(PyObject *obj) { // TODO can be removed again once SparseArray.values is removed (GH26421) if (PyObject_HasAttrString(obj, "_subtyp")) { @@ -227,7 +226,6 @@ static int is_sparse_array(PyObject *obj) { return 0; } - static PyObject *get_values(PyObject *obj) { PyObject *values = NULL; @@ -242,7 +240,8 @@ static PyObject *get_values(PyObject *obj) { values = PyObject_CallMethod(values, "to_numpy", NULL); } - if (!is_sparse_array(values) && PyObject_HasAttrString(values, "values")) { + if (!is_sparse_array(values) && + PyObject_HasAttrString(values, "values")) { PyObject *subvals = get_values(values); PyErr_Clear(); PRINTMARK(); @@ -357,20 +356,20 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { } static npy_int64 get_long_attr(PyObject *o, const char *attr) { - npy_int64 long_val; - PyObject *value = PyObject_GetAttrString(o, attr); - long_val = (PyLong_Check(value) ? - PyLong_AsLongLong(value) : PyLong_AsLong(value)); - Py_DECREF(value); - return long_val; + npy_int64 long_val; + PyObject *value = PyObject_GetAttrString(o, attr); + long_val = + (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value)); + Py_DECREF(value); + return long_val; } static npy_float64 total_seconds(PyObject *td) { - npy_float64 double_val; - PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL); - double_val = PyFloat_AS_DOUBLE(value); - Py_DECREF(value); - return double_val; + npy_float64 double_val; + PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL); + double_val = PyFloat_AS_DOUBLE(value); + Py_DECREF(value); + return double_val; } static PyObject *get_item(PyObject *obj, Py_ssize_t i) { @@ -450,7 +449,7 @@ static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, if (PyUnicode_IS_COMPACT_ASCII(obj)) { Py_ssize_t len; - char *data = (char*)PyUnicode_AsUTF8AndSize(obj, &len); + char *data = (char *)PyUnicode_AsUTF8AndSize(obj, &len); *_outLen = len; return data; } @@ -505,7 +504,7 @@ static void *NpyDateTimeScalarToJSON(JSOBJ _obj, JSONTypeContext *tc, // TODO(anyone): Does not appear to be reached in tests. pandas_datetime_to_datetimestruct(obj->obval, - (NPY_DATETIMEUNIT)obj->obmeta.base, &dts); + (NPY_DATETIMEUNIT)obj->obmeta.base, &dts); return PandasDateTimeStructToJSON(&dts, tc, outValue, _outLen); } @@ -664,9 +663,9 @@ void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { GET_TC(tc)->npyarr = npyarr; if (!npyarr) { - PyErr_NoMemory(); - GET_TC(tc)->iterNext = NpyArr_iterNextNone; - return; + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; } npyarr->array = (PyObject *)obj; @@ -677,17 +676,17 @@ void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { npyarr->type_num = PyArray_DESCR(obj)->type_num; if (GET_TC(tc)->transpose) { - npyarr->dim = PyArray_DIM(obj, npyarr->ndim); - npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim); - npyarr->stridedim = npyarr->ndim; - npyarr->index[npyarr->ndim] = 0; - npyarr->inc = -1; + npyarr->dim = PyArray_DIM(obj, npyarr->ndim); + npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim); + npyarr->stridedim = npyarr->ndim; + npyarr->index[npyarr->ndim] = 0; + npyarr->inc = -1; } else { - npyarr->dim = PyArray_DIM(obj, 0); - npyarr->stride = PyArray_STRIDE(obj, 0); - npyarr->stridedim = 0; - npyarr->index[0] = 0; - npyarr->inc = 1; + npyarr->dim = PyArray_DIM(obj, 0); + npyarr->stride = PyArray_STRIDE(obj, 0); + npyarr->stridedim = 0; + npyarr->index[0] = 0; + npyarr->inc = 1; } npyarr->columnLabels = GET_TC(tc)->columnLabels; @@ -735,8 +734,7 @@ int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { NpyArr_freeItemValue(obj, tc); - if (PyArray_ISDATETIME(npyarr->array)) - { + if (PyArray_ISDATETIME(npyarr->array)) { PRINTMARK(); GET_TC(tc)->itemValue = obj; Py_INCREF(obj); @@ -797,10 +795,10 @@ char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { idx = npyarr->index[npyarr->stridedim] - 1; - cStr = npyarr->columnLabels[idx]; + cStr = npyarr->columnLabels[idx]; } else { idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; - cStr = npyarr->rowLabels[idx]; + cStr = npyarr->rowLabels[idx]; } *outLen = strlen(cStr); @@ -852,13 +850,13 @@ char *PdBlock_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) { idx = blkCtxt->colIdx - 1; - cStr = npyarr->columnLabels[idx]; + cStr = npyarr->columnLabels[idx]; } else { idx = GET_TC(tc)->iterNext != PdBlock_iterNext ? npyarr->index[npyarr->stridedim - npyarr->inc] - 1 : npyarr->index[npyarr->stridedim]; - cStr = npyarr->rowLabels[idx]; + cStr = npyarr->rowLabels[idx]; } *outLen = strlen(cStr); @@ -875,10 +873,10 @@ char *PdBlock_iterGetName_Transpose(JSOBJ obj, JSONTypeContext *tc, if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { idx = npyarr->index[npyarr->stridedim] - 1; - cStr = npyarr->columnLabels[idx]; + cStr = npyarr->columnLabels[idx]; } else { idx = blkCtxt->colIdx; - cStr = npyarr->rowLabels[idx]; + cStr = npyarr->rowLabels[idx]; } *outLen = strlen(cStr); @@ -943,9 +941,9 @@ void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { dtype = PyArray_DescrFromType(NPY_INT64); obj = (PyObject *)_obj; - GET_TC(tc) - ->iterGetName = GET_TC(tc)->transpose ? PdBlock_iterGetName_Transpose - : PdBlock_iterGetName; + GET_TC(tc)->iterGetName = GET_TC(tc)->transpose + ? PdBlock_iterGetName_Transpose + : PdBlock_iterGetName; blkCtxt = PyObject_Malloc(sizeof(PdBlockContext)); if (!blkCtxt) { @@ -1396,7 +1394,7 @@ void Series_iterBegin(JSOBJ obj, JSONTypeContext *tc) { PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; GET_TC(tc)->index = 0; GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); - enc->outputFormat = VALUES; // for contained series + enc->outputFormat = VALUES; // for contained series if (!GET_TC(tc)->cStr) { PyErr_NoMemory(); } @@ -1455,7 +1453,7 @@ void DataFrame_iterBegin(JSOBJ obj, JSONTypeContext *tc) { PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; GET_TC(tc)->index = 0; GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); - enc->outputFormat = VALUES; // for contained series & index + enc->outputFormat = VALUES; // for contained series & index if (!GET_TC(tc)->cStr) { PyErr_NoMemory(); } @@ -1634,115 +1632,116 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, type_num = PyArray_TYPE(labels); for (i = 0; i < num; i++) { - item = PyArray_GETITEM(labels, dataptr); + item = PyArray_GETITEM(labels, dataptr); if (!item) { - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - // TODO: for any matches on type_num (date and timedeltas) should use a - // vectorized solution to convert to epoch or iso formats - if (enc->datetimeIso && (type_num == NPY_TIMEDELTA || PyDelta_Check(item))) { - PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item); - if (td == NULL) { - Py_DECREF(item); NpyArr_freeLabels(ret, num); ret = 0; break; - } - - PyObject *iso = PyObject_CallMethod(td, "isoformat", NULL); - Py_DECREF(td); - if (iso == NULL) { - Py_DECREF(item); - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - cLabel = (char *)PyUnicode_AsUTF8(iso); - Py_DECREF(iso); - len = strlen(cLabel); - } - else if (PyTypeNum_ISDATETIME(type_num) || - PyDateTime_Check(item) || PyDate_Check(item)) { - PyObject *ts = PyObject_CallFunction(cls_timestamp, "(O)", item); - if (ts == NULL) { - Py_DECREF(item); - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - if (enc->datetimeIso) { - PyObject *iso = PyObject_CallMethod(ts, "isoformat", NULL); - Py_DECREF(ts); - if (iso == NULL) { - Py_DECREF(item); - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - cLabel = (char *)PyUnicode_AsUTF8(iso); - Py_DECREF(iso); - len = strlen(cLabel); - } else { - npy_int64 value; - // TODO: refactor to not duplicate what goes on in beginTypeContext - if (PyObject_HasAttrString(ts, "value")) { - PRINTMARK(); - value = get_long_attr(ts, "value"); - } else { - PRINTMARK(); - value = - total_seconds(ts) * 1000000000LL; // nanoseconds per second - } - Py_DECREF(ts); - - switch (enc->datetimeUnit) { - case NPY_FR_ns: - break; - case NPY_FR_us: - value /= 1000LL; - break; - case NPY_FR_ms: - value /= 1000000LL; - break; - case NPY_FR_s: - value /= 1000000000LL; - break; - default: - Py_DECREF(item); - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - char buf[21] = {0}; // 21 chars for 2**63 as string - cLabel = buf; - sprintf(buf, "%" NPY_INT64_FMT, value); - len = strlen(cLabel); - } - } else { // Fallack to string representation - PyObject *str = PyObject_Str(item); - if (str == NULL) { - Py_DECREF(item); - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - cLabel = (char *)PyUnicode_AsUTF8(str); - Py_DECREF(str); - len = strlen(cLabel); - } - - Py_DECREF(item); - // Add 1 to include NULL terminator - ret[i] = PyObject_Malloc(len + 1); - memcpy(ret[i], cLabel, len + 1); + } + + // TODO: for any matches on type_num (date and timedeltas) should use a + // vectorized solution to convert to epoch or iso formats + if (enc->datetimeIso && + (type_num == NPY_TIMEDELTA || PyDelta_Check(item))) { + PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item); + if (td == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + PyObject *iso = PyObject_CallMethod(td, "isoformat", NULL); + Py_DECREF(td); + if (iso == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + cLabel = (char *)PyUnicode_AsUTF8(iso); + Py_DECREF(iso); + len = strlen(cLabel); + } else if (PyTypeNum_ISDATETIME(type_num) || PyDateTime_Check(item) || + PyDate_Check(item)) { + PyObject *ts = PyObject_CallFunction(cls_timestamp, "(O)", item); + if (ts == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + if (enc->datetimeIso) { + PyObject *iso = PyObject_CallMethod(ts, "isoformat", NULL); + Py_DECREF(ts); + if (iso == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + cLabel = (char *)PyUnicode_AsUTF8(iso); + Py_DECREF(iso); + len = strlen(cLabel); + } else { + npy_int64 value; + // TODO: refactor to not duplicate what goes on in + // beginTypeContext + if (PyObject_HasAttrString(ts, "value")) { + PRINTMARK(); + value = get_long_attr(ts, "value"); + } else { + PRINTMARK(); + value = total_seconds(ts) * + 1000000000LL; // nanoseconds per second + } + Py_DECREF(ts); + + switch (enc->datetimeUnit) { + case NPY_FR_ns: + break; + case NPY_FR_us: + value /= 1000LL; + break; + case NPY_FR_ms: + value /= 1000000LL; + break; + case NPY_FR_s: + value /= 1000000000LL; + break; + default: + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + char buf[21] = {0}; // 21 chars for 2**63 as string + cLabel = buf; + sprintf(buf, "%" NPY_INT64_FMT, value); + len = strlen(cLabel); + } + } else { // Fallack to string representation + PyObject *str = PyObject_Str(item); + if (str == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + cLabel = (char *)PyUnicode_AsUTF8(str); + Py_DECREF(str); + len = strlen(cLabel); + } + + Py_DECREF(item); + // Add 1 to include NULL terminator + ret[i] = PyObject_Malloc(len + 1); + memcpy(ret[i], cLabel, len + 1); if (PyErr_Occurred()) { NpyArr_freeLabels(ret, num); @@ -1923,23 +1922,22 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { value = get_long_attr(obj, "value"); } else { PRINTMARK(); - value = - total_seconds(obj) * 1000000000LL; // nanoseconds per second + value = total_seconds(obj) * 1000000000LL; // nanoseconds per second } base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; switch (base) { - case NPY_FR_ns: - break; - case NPY_FR_us: - value /= 1000LL; - break; - case NPY_FR_ms: - value /= 1000000LL; - break; - case NPY_FR_s: - value /= 1000000000LL; - break; + case NPY_FR_ns: + break; + case NPY_FR_us: + value /= 1000LL; + break; + case NPY_FR_ms: + value /= 1000000LL; + break; + case NPY_FR_s: + value /= 1000000000LL; + break; } exc = PyErr_Occurred(); @@ -2054,8 +2052,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { goto INVALID; } pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0); - pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, - enc, + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, pc->columnLabelsLen); if (!pc->columnLabels) { goto INVALID; @@ -2157,8 +2154,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { goto INVALID; } pc->columnLabelsLen = PyObject_Size(tmpObj); - pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, - enc, + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, pc->columnLabelsLen); Py_DECREF(tmpObj); if (!pc->columnLabels) { @@ -2179,9 +2175,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { goto INVALID; } pc->rowLabelsLen = PyObject_Size(tmpObj); - pc->rowLabels = - NpyArr_encodeLabels((PyArrayObject *)values, - enc, pc->rowLabelsLen); + pc->rowLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->rowLabelsLen); Py_DECREF(tmpObj); tmpObj = (enc->outputFormat == INDEX ? PyObject_GetAttrString(obj, "columns") @@ -2199,8 +2194,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { goto INVALID; } pc->columnLabelsLen = PyObject_Size(tmpObj); - pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, - enc, + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, pc->columnLabelsLen); Py_DECREF(tmpObj); if (!pc->columnLabels) { @@ -2325,7 +2319,8 @@ void Object_endTypeContext(JSOBJ obj, JSONTypeContext *tc) { PyObject_Free(GET_TC(tc)->cStr); GET_TC(tc)->cStr = NULL; - if (tc->prv != &(((PyObjectEncoder *)tc->encoder)->basicTypeContext)) { // NOLINT + if (tc->prv != + &(((PyObjectEncoder *)tc->encoder)->basicTypeContext)) { // NOLINT PyObject_Free(tc->prv); } tc->prv = NULL; @@ -2388,7 +2383,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *newobj; PyObject *oinput = NULL; PyObject *oensureAscii = NULL; - int idoublePrecision = 10; // default double precision setting + int idoublePrecision = 10; // default double precision setting PyObject *oencodeHTMLChars = NULL; char *sOrient = NULL; char *sdateFormat = NULL; @@ -2411,10 +2406,10 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject_Malloc, PyObject_Realloc, PyObject_Free, - -1, // recursionMax + -1, // recursionMax idoublePrecision, - 1, // forceAscii - 0, // encodeHTMLChars + 1, // forceAscii + 0, // encodeHTMLChars }}; JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder;