Skip to content

BUG: to_json incorrectly localizes tz-naive datetimes to UTC #46730

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
May 4, 2022
31 changes: 31 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,36 @@ to UTC. (:issue:`38760`)
Note that this patch does not fix the localization of tz-aware Timestamps to UTC
upon serialization. (Related issue :issue:`12997`)

*Old Behavior*

.. ipython:: python

index = pd.date_range(
start='2020-12-28 00:00:00',
end='2020-12-28 02:00:00',
freq='1H',
)
a = pd.Series(
data=range(3),
index=index,
)

.. code-block:: ipython

In [4]: a.to_json(date_format='iso')
Out[4]: '{"2020-12-28T00:00:00.000Z":0,"2020-12-28T01:00:00.000Z":1,"2020-12-28T02:00:00.000Z":2}'

In [5]: pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index
Out[5]: array([False, False, False])

*New Behavior*

.. ipython:: python

a.to_json(date_format='iso')
# Roundtripping now works
pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index

.. ---------------------------------------------------------------------------
.. _whatsnew_150.api_breaking:

Expand Down Expand Up @@ -582,6 +612,7 @@ I/O
- Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`)
- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`)
- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`)
- :meth:`to_html` now excludes the ``border`` attribute from ``<table>`` elements when ``border`` keyword is set to ``False``.
-

Period
Expand Down
12 changes: 8 additions & 4 deletions pandas/_libs/src/ujson/python/date_conversions.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,14 @@ char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
// Check to see if PyDateTime has a timezone.
// Don't convert to UTC if it doesn't.
int is_tz_aware = 0;
PyObject *offset = extract_utc_offset(obj);
if (offset != NULL) {
is_tz_aware = 1;
Py_DECREF(offset);
if (PyObject_HasAttrString(obj, "tzinfo")) {
PyObject *offset = extract_utc_offset(obj);
if (offset != NULL) {
if (offset != Py_None) {
is_tz_aware = 1;
}
Py_DECREF(offset);
}
}
ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base);

Expand Down
97 changes: 51 additions & 46 deletions pandas/_libs/tslibs/src/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,32 +332,31 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a,
return 0;
}
/*
* Returns the offset from utc of the timezone.
* If the passed object is timezone naive, or if extraction
* of the offset fails, NULL is returned.
* Returns the offset from utc of the timezone as a timedelta.
* The caller is responsible for ensuring that the tzinfo
* attribute exists on the datetime object.
*
* If the passed object is timezone naive, Py_None is returned.
* If extraction of the offset fails, NULL is returned.
*
* NOTE: This function is not vendored from numpy.
*/
PyObject *extract_utc_offset(PyObject *obj) {
if (PyObject_HasAttrString(obj, "tzinfo")) {
PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo");
if (tmp == NULL) {
PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo");
if (tmp == NULL) {
return NULL;
}
if (tmp != Py_None) {
PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj);
if (offset == NULL) {
Py_DECREF(tmp);
return NULL;
}
if (tmp != Py_None) {
PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj);
if (offset == NULL) {
Py_DECREF(tmp);
return NULL;
}
if (offset != Py_None) {
return offset;
}
Py_DECREF(offset);
if (offset != Py_None) {
return offset;
}
Py_DECREF(tmp);
}
return NULL;
return tmp;
}

/*
Expand Down Expand Up @@ -404,38 +403,44 @@ int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second"));
out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond"));

PyObject *offset = extract_utc_offset(obj);
/* Apply the time zone offset if datetime obj is tz-aware */
if (offset != NULL) {
PyObject *tmp_int;
int seconds_offset, minutes_offset;
/*
* The timedelta should have a function "total_seconds"
* which contains the value we want.
*/
tmp = PyObject_CallMethod(offset, "total_seconds", "");
Py_DECREF(offset);
if (tmp == NULL) {
return -1;
}
tmp_int = PyNumber_Long(tmp);
if (tmp_int == NULL) {
Py_DECREF(tmp);
return -1;
}
seconds_offset = PyLong_AsLong(tmp_int);
if (seconds_offset == -1 && PyErr_Occurred()) {
if (PyObject_HasAttrString(obj, "tzinfo")) {
PyObject *offset = extract_utc_offset(obj);
/* Apply the time zone offset if datetime obj is tz-aware */
if (offset != NULL) {
if (offset == Py_None) {
Py_DECREF(offset);
return 0;
}
PyObject *tmp_int;
int seconds_offset, minutes_offset;
/*
* The timedelta should have a function "total_seconds"
* which contains the value we want.
*/
tmp = PyObject_CallMethod(offset, "total_seconds", "");
Py_DECREF(offset);
if (tmp == NULL) {
return -1;
}
tmp_int = PyNumber_Long(tmp);
if (tmp_int == NULL) {
Py_DECREF(tmp);
return -1;
}
seconds_offset = PyLong_AsLong(tmp_int);
if (seconds_offset == -1 && PyErr_Occurred()) {
Py_DECREF(tmp_int);
Py_DECREF(tmp);
return -1;
}
Py_DECREF(tmp_int);
Py_DECREF(tmp);
return -1;
}
Py_DECREF(tmp_int);
Py_DECREF(tmp);

/* Convert to a minutes offset and apply it */
minutes_offset = seconds_offset / 60;
/* Convert to a minutes offset and apply it */
minutes_offset = seconds_offset / 60;

add_minutes_to_datetimestruct(out, -minutes_offset);
add_minutes_to_datetimestruct(out, -minutes_offset);
}
}

return 0;
Expand Down