Skip to content

REF: use datetime C API instead of getattrs #51368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions pandas/_libs/src/ujson/python/date_conversions.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,14 @@ char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
npy_datetimestruct dts;
int ret;

ret = convert_pydatetime_to_datetimestruct(obj, &dts);
PyDateTime_IMPORT;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the cpython source the datetime library seems to be implemented as a capsule. There's some documentaiton on that here:

https://docs.python.org/3/extending/extending.html#providing-a-c-api-for-an-extension-module

I really don't want to keep scattering these imports throughout the code base because it goes against the CPython documentation - can you check if the capsule approach would work?

if (!PyDate_Check(obj)) {
PyErr_SetString(PyExc_TypeError, "Expected date object");
return NULL;
}
PyDateTime_Date *dtobj = (PyDateTime_Date*)obj;

ret = convert_pydatetime_to_datetimestruct(dtobj, &dts);
if (ret != 0) {
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
Expand Down Expand Up @@ -121,7 +128,15 @@ npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) {
npy_datetimestruct dts;
int ret;

ret = convert_pydatetime_to_datetimestruct(dt, &dts);
PyDateTime_IMPORT;

if (!PyDate_Check(dt)) {
PyErr_SetString(PyExc_TypeError, "Expected date object");
return NULL;
}
PyDateTime_Date *dtobj = (PyDateTime_Date*)dt;

ret = convert_pydatetime_to_datetimestruct(dtobj, &dts);
if (ret != 0) {
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
Expand Down
35 changes: 14 additions & 21 deletions pandas/_libs/tslibs/src/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
#include <numpy/ndarraytypes.h>
#include "np_datetime.h"

#include "datetime.h"

const npy_datetimestruct _AS_MIN_DTS = {
1969, 12, 31, 23, 59, 50, 776627, 963145, 224193};
Expand Down Expand Up @@ -370,39 +371,31 @@ PyObject *extract_utc_offset(PyObject *obj) {
* Returns -1 on error, 0 on success, and 1 (with no error set)
* if obj doesn't have the needed date or datetime attributes.
*/
int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj,
npy_datetimestruct *out) {
// Assumes that obj is a valid datetime object
PyObject *tmp;
PyObject *obj = (PyObject*)dtobj;

/* Initialize the output to all zeros */
memset(out, 0, sizeof(npy_datetimestruct));
out->month = 1;
out->day = 1;

out->year = PyLong_AsLong(PyObject_GetAttrString(obj, "year"));
out->month = PyLong_AsLong(PyObject_GetAttrString(obj, "month"));
out->day = PyLong_AsLong(PyObject_GetAttrString(obj, "day"));
PyDateTime_IMPORT;

// TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use
// PyDateTime_Check here, and less verbose attribute lookups.
out->year = PyDateTime_GET_YEAR(dtobj);
out->month = PyDateTime_GET_MONTH(dtobj);
out->day = PyDateTime_GET_DAY(dtobj);
out->hour = PyDateTime_DATE_GET_HOUR(dtobj);

/* Check for time attributes (if not there, return success as a date) */
if (!PyObject_HasAttrString(obj, "hour") ||
!PyObject_HasAttrString(obj, "minute") ||
!PyObject_HasAttrString(obj, "second") ||
!PyObject_HasAttrString(obj, "microsecond")) {
return 0;
}

out->hour = PyLong_AsLong(PyObject_GetAttrString(obj, "hour"));
out->min = PyLong_AsLong(PyObject_GetAttrString(obj, "minute"));
out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second"));
out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond"));
if (PyDateTime_Check(dtobj)) {
PyDateTime_DateTime* obj = (PyDateTime_DateTime*)dtobj;
out->min = PyDateTime_DATE_GET_MINUTE(obj);
out->sec = PyDateTime_DATE_GET_SECOND(obj);
out->us = PyDateTime_DATE_GET_MICROSECOND(obj);

if (PyObject_HasAttrString(obj, "tzinfo")) {
PyObject *offset = extract_utc_offset(obj);
// TODO(py3.10): in py3.10 we can use PyDateTime_DATE_GET_TZINFO
PyObject *offset = extract_utc_offset((PyObject*)obj);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this cast necessary?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

without it i see a bunch of warnings

warning: incompatible pointer types passing 'PyDateTime_DateTime *' to parameter of type 'PyObject *' (aka 'struct _object *') [-Wincompatible-pointer-types]
        PyObject *offset = extract_utc_offset(obj);

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha makes sense

/* Apply the time zone offset if datetime obj is tz-aware */
if (offset != NULL) {
if (offset == Py_None) {
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/src/datetime/np_datetime.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt

#include <numpy/ndarraytypes.h>

#include "datetime.h"

typedef struct {
npy_int64 days;
npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds;
Expand Down Expand Up @@ -50,7 +52,7 @@ extern const npy_datetimestruct _M_MAX_DTS;

PyObject *extract_utc_offset(PyObject *obj);

int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj,
npy_datetimestruct *out);

npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
Expand Down