From ce6338ec146df1bf8671f6939558aaa5abd7d007 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 13 Jan 2020 09:52:27 -0800 Subject: [PATCH 1/6] added test case --- pandas/tests/io/json/test_pandas.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index e909a4952948c..cde7e04757002 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,4 +1,5 @@ from collections import OrderedDict +import datetime from datetime import timedelta from io import StringIO import json @@ -810,6 +811,29 @@ def test_convert_dates(self): result = read_json(json, typ="series") tm.assert_series_equal(result, ts) + @pytest.mark.parametrize("date_format", ["epoch", "iso"]) + @pytest.mark.parametrize("as_object", [True, False]) + @pytest.mark.parametrize("date_typ", [datetime.date, datetime.datetime, pd.Timestamp]) + def test_date_index_and_values(self, date_format, as_object, date_typ): + data = [date_typ(year=2020, month=1, day=1)] + if as_object: + data.append("a") + + ser = pd.Series(data, index=data) + result = ser.to_json(date_format=date_format) + + if date_format == "epoch": + expected = '{"1577836800000":1577836800000}' + else: + expected = ( + '{"2020-01-01T00:00:00.000Z":"2020-01-01T00:00:00.000Z"}' + ) + + if as_object: + expected = expected.replace("}", ',"a":"a"}') + + assert result == expected + @pytest.mark.parametrize( "infer_word", [ From efe467ba5e655dbee3b3dcf9f16314c3799582d0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 13 Jan 2020 09:53:49 -0800 Subject: [PATCH 2/6] fixed lack of date support --- pandas/_libs/src/ujson/python/objToJSON.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index c413a16f8d5f0..3bbd6e2ebac59 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -456,7 +456,7 @@ static char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, size_t *len) { - if (!PyDateTime_Check(obj)) { + if (!PyDate_Check(obj)) { PyErr_SetString(PyExc_TypeError, "Expected datetime object"); return NULL; } From 8ecae44fc94ae562217084f41700f06d73dedcfb Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 13 Jan 2020 10:08:03 -0800 Subject: [PATCH 3/6] Fixed null handling --- pandas/_libs/src/ujson/python/objToJSON.c | 17 +++++++++++++---- pandas/tests/io/json/test_pandas.py | 6 +++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 3bbd6e2ebac59..b81f12e392108 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -1541,9 +1541,12 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, break; } - // TODO: vectorized timedelta solution - if (enc->datetimeIso && - (type_num == NPY_TIMEDELTA || PyDelta_Check(item))) { + if (PyObject_TypeCheck(item, cls_nat)) { + len = 5; // TODO: shouldn't require extra space for terminator + cLabel = PyObject_Malloc(len); + strncpy(cLabel, "null", len); + } else if (enc->datetimeIso && + (type_num == NPY_TIMEDELTA || PyDelta_Check(item))) { PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item); if (td == NULL) { Py_DECREF(item); @@ -1575,7 +1578,13 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, enc->npyType); } castfunc(dataptr, &longVal, 1, NULL, NULL); - if (enc->datetimeIso) { + + if (longVal == get_nat()) { + PRINTMARK(); + len = 5; // TODO: shouldn't require extra space for terminator + cLabel = PyObject_Malloc(len); + strncpy(cLabel, "null", len); + } else if (enc->datetimeIso) { cLabel = int64ToIso(longVal, base, &len); } else { if (!scaleNanosecToUnit(&longVal, base)) { diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index cde7e04757002..0f66f5464f9cc 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -815,7 +815,7 @@ def test_convert_dates(self): @pytest.mark.parametrize("as_object", [True, False]) @pytest.mark.parametrize("date_typ", [datetime.date, datetime.datetime, pd.Timestamp]) def test_date_index_and_values(self, date_format, as_object, date_typ): - data = [date_typ(year=2020, month=1, day=1)] + data = [date_typ(year=2020, month=1, day=1), pd.NaT] if as_object: data.append("a") @@ -823,10 +823,10 @@ def test_date_index_and_values(self, date_format, as_object, date_typ): result = ser.to_json(date_format=date_format) if date_format == "epoch": - expected = '{"1577836800000":1577836800000}' + expected = '{"1577836800000":1577836800000,"null":null}' else: expected = ( - '{"2020-01-01T00:00:00.000Z":"2020-01-01T00:00:00.000Z"}' + '{"2020-01-01T00:00:00.000Z":"2020-01-01T00:00:00.000Z","null":null}' ) if as_object: From 73bf88342e0a095a9746fd13863853d937288449 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 13 Jan 2020 10:14:35 -0800 Subject: [PATCH 4/6] Black --- pandas/tests/io/json/test_pandas.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 0f66f5464f9cc..bb873c71e8a35 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -813,7 +813,9 @@ def test_convert_dates(self): @pytest.mark.parametrize("date_format", ["epoch", "iso"]) @pytest.mark.parametrize("as_object", [True, False]) - @pytest.mark.parametrize("date_typ", [datetime.date, datetime.datetime, pd.Timestamp]) + @pytest.mark.parametrize( + "date_typ", [datetime.date, datetime.datetime, pd.Timestamp] + ) def test_date_index_and_values(self, date_format, as_object, date_typ): data = [date_typ(year=2020, month=1, day=1), pd.NaT] if as_object: @@ -832,7 +834,7 @@ def test_date_index_and_values(self, date_format, as_object, date_typ): if as_object: expected = expected.replace("}", ',"a":"a"}') - assert result == expected + assert result == expected @pytest.mark.parametrize( "infer_word", From 254449580184e5371cfbd6283ba3498264470a90 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 13 Jan 2020 10:26:23 -0800 Subject: [PATCH 5/6] jbrockmendel feedback --- pandas/_libs/src/ujson/python/objToJSON.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index b81f12e392108..bd8d904456700 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -457,7 +457,7 @@ static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, size_t *len) { if (!PyDate_Check(obj)) { - PyErr_SetString(PyExc_TypeError, "Expected datetime object"); + PyErr_SetString(PyExc_TypeError, "Expected date object"); return NULL; } @@ -469,7 +469,7 @@ static npy_datetime PyDateTimeToEpoch(PyObject *obj, NPY_DATETIMEUNIT base) { npy_datetimestruct dts; int ret; - if (!PyDateTime_Check(obj)) { + if (!PyDate_Check(obj)) { // TODO: raise TypeError } PyDateTime_Date *dt = (PyDateTime_Date *)obj; From c102f5b9ed5913af9f4cceed3c216dca2a8d1993 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 15 Jan 2020 09:57:37 -0800 Subject: [PATCH 6/6] Refactor --- pandas/_libs/src/ujson/python/objToJSON.c | 121 +++++++++++++--------- 1 file changed, 71 insertions(+), 50 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index bd8d904456700..c5ac279ed3243 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -1504,6 +1504,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, char **ret; char *dataptr, *cLabel; int type_num; + NPY_DATETIMEUNIT base = enc->datetimeUnit; PRINTMARK(); if (!labels) { @@ -1541,35 +1542,10 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, break; } - if (PyObject_TypeCheck(item, cls_nat)) { - len = 5; // TODO: shouldn't require extra space for terminator - cLabel = PyObject_Malloc(len); - strncpy(cLabel, "null", len); - } else if (enc->datetimeIso && - (type_num == NPY_TIMEDELTA || PyDelta_Check(item))) { - PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item); - if (td == NULL) { - Py_DECREF(item); - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - PyObject *iso = PyObject_CallMethod(td, "isoformat", NULL); - Py_DECREF(td); - if (iso == NULL) { - Py_DECREF(item); - NpyArr_freeLabels(ret, num); - ret = 0; - break; - } - - cLabel = (char *)PyUnicode_AsUTF8(iso); - Py_DECREF(iso); - len = strlen(cLabel); - } else if (PyTypeNum_ISDATETIME(type_num)) { - NPY_DATETIMEUNIT base = enc->datetimeUnit; - npy_int64 longVal; + int is_datetimelike = 0; + npy_int64 nanosecVal; + if (PyTypeNum_ISDATETIME(type_num)) { + is_datetimelike = 1; PyArray_VectorUnaryFunc *castfunc = PyArray_GetCastFunc(PyArray_DescrFromType(type_num), NPY_INT64); if (!castfunc) { @@ -1577,33 +1553,74 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, "Cannot cast numpy dtype %d to long", enc->npyType); } - castfunc(dataptr, &longVal, 1, NULL, NULL); + castfunc(dataptr, &nanosecVal, 1, NULL, NULL); + } else if (PyDate_Check(item) || PyDelta_Check(item)) { + is_datetimelike = 1; + if (PyObject_HasAttrString(item, "value")) { + nanosecVal = get_long_attr(item, "value"); + } else { + if (PyDelta_Check(item)) { + nanosecVal = total_seconds(item) * + 1000000000LL; // nanoseconds per second + } else { + // datetime.* objects don't follow above rules + nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns); + } + } + } - if (longVal == get_nat()) { - PRINTMARK(); + if (is_datetimelike) { + if (nanosecVal == get_nat()) { len = 5; // TODO: shouldn't require extra space for terminator cLabel = PyObject_Malloc(len); strncpy(cLabel, "null", len); - } else if (enc->datetimeIso) { - cLabel = int64ToIso(longVal, base, &len); } else { - if (!scaleNanosecToUnit(&longVal, base)) { - // TODO: This gets hit but somehow doesn't cause errors - // need to clean up (elsewhere in module as well) + if (enc->datetimeIso) { + // TODO: Vectorized Timedelta function + if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { + PyObject *td = + PyObject_CallFunction(cls_timedelta, "(O)", item); + if (td == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + PyObject *iso = + PyObject_CallMethod(td, "isoformat", NULL); + Py_DECREF(td); + if (iso == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + len = strlen(PyUnicode_AsUTF8(iso)); + cLabel = PyObject_Malloc(len + 1); + memcpy(cLabel, PyUnicode_AsUTF8(iso), len + 1); + Py_DECREF(iso); + } else { + if (type_num == NPY_DATETIME) { + cLabel = int64ToIso(nanosecVal, base, &len); + } else { + cLabel = PyDateTimeToIso((PyDateTime_Date *)item, + base, &len); + } + } + if (cLabel == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + } else { + cLabel = PyObject_Malloc(21); // 21 chars for int64 + sprintf(cLabel, "%" NPY_DATETIME_FMT, + NpyDateTimeToEpoch(nanosecVal, base)); + len = strlen(cLabel); } - cLabel = PyObject_Malloc(21); // 21 chars for int64 - sprintf(cLabel, "%" NPY_INT64_FMT, longVal); - len = strlen(cLabel); - } - } else if (PyDateTime_Check(item) || PyDate_Check(item)) { - NPY_DATETIMEUNIT base = enc->datetimeUnit; - if (enc->datetimeIso) { - cLabel = PyDateTimeToIso((PyDateTime_Date *)item, base, &len); - } else { - cLabel = PyObject_Malloc(21); // 21 chars for int64 - sprintf(cLabel, "%" NPY_DATETIME_FMT, - PyDateTimeToEpoch(item, base)); - len = strlen(cLabel); } } else { // Fallback to string representation PyObject *str = PyObject_Str(item); @@ -1624,6 +1641,10 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, ret[i] = PyObject_Malloc(len + 1); memcpy(ret[i], cLabel, len + 1); + if (is_datetimelike) { + PyObject_Free(cLabel); + } + if (PyErr_Occurred()) { NpyArr_freeLabels(ret, num); ret = 0;