Skip to content

Commit 5e5adc6

Browse files
committed
Merge branch 'main' into DEPR-to_datetime-mixed-offsets-with-utc=False
2 parents c42f143 + edc0870 commit 5e5adc6

File tree

40 files changed

+419
-107
lines changed

40 files changed

+419
-107
lines changed

.circleci/config.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ jobs:
4747
- run:
4848
name: Build aarch64 wheels
4949
command: |
50-
pip3 install cibuildwheel==2.12.1
51-
cibuildwheel --output-dir wheelhouse
50+
pip3 install cibuildwheel==2.14.1
51+
cibuildwheel --prerelease-pythons --output-dir wheelhouse
5252
environment:
5353
CIBW_BUILD: << parameters.cibw-build >>
5454

@@ -91,4 +91,5 @@ workflows:
9191
only: /^v.*/
9292
matrix:
9393
parameters:
94-
cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"]
94+
# TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
95+
cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"]#, "cp312-manylinux_aarch64"]

.github/workflows/unit-tests.yml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -311,12 +311,16 @@ jobs:
311311
# To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs
312312
# to the corresponding posix/windows-macos/sdist etc. workflows.
313313
# Feel free to modify this comment as necessary.
314-
if: false # Uncomment this to freeze the workflow, comment it to unfreeze
314+
#if: false # Uncomment this to freeze the workflow, comment it to unfreeze
315315
runs-on: ${{ matrix.os }}
316316
strategy:
317317
fail-fast: false
318318
matrix:
319-
os: [ubuntu-22.04, macOS-latest, windows-latest]
319+
# TODO: Disable macOS for now, Github Actions bug where python is not
320+
# symlinked correctly to 3.12
321+
# xref https://github.com/actions/setup-python/issues/701
322+
#os: [ubuntu-22.04, macOS-latest, windows-latest]
323+
os: [ubuntu-22.04, windows-latest]
320324

321325
timeout-minutes: 180
322326

@@ -340,21 +344,21 @@ jobs:
340344
- name: Set up Python Dev Version
341345
uses: actions/setup-python@v4
342346
with:
343-
python-version: '3.11-dev'
347+
python-version: '3.12-dev'
344348

345349
- name: Install dependencies
346350
run: |
347351
python --version
348-
python -m pip install --upgrade pip setuptools wheel
352+
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.0.1 meson-python==0.13.1
349353
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
350354
python -m pip install git+https://github.com/nedbat/coveragepy.git
351355
python -m pip install versioneer[toml]
352-
python -m pip install python-dateutil pytz cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
356+
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
353357
python -m pip list
354358
355359
- name: Build Pandas
356360
run: |
357-
python -m pip install -e . --no-build-isolation --no-index
361+
python -m pip install -ve . --no-build-isolation --no-index
358362
359363
- name: Build Version
360364
run: |

.github/workflows/wheels.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ jobs:
9393
- [macos-12, macosx_*]
9494
- [windows-2022, win_amd64]
9595
# TODO: support PyPy?
96-
python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]
96+
# TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
97+
python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]#, ["cp312", "3.12"]]
9798
env:
9899
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
99100
IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
@@ -117,6 +118,7 @@ jobs:
117118
#with:
118119
# package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }}
119120
env:
121+
CIBW_PRERELEASE_PYTHONS: True
120122
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
121123

122124
- name: Set up Python

doc/source/whatsnew/v2.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,7 @@ I/O
571571
- Bug in :func:`read_sql` when reading multiple timezone aware columns with the same column name (:issue:`44421`)
572572
- Bug in :func:`read_xml` stripping whitespace in string data (:issue:`53811`)
573573
- Bug in :meth:`DataFrame.to_html` where ``colspace`` was incorrectly applied in case of multi index columns (:issue:`53885`)
574+
- Bug in :meth:`DataFrame.to_json` where :class:`DateTimeArray`/:class:`DateTimeIndex` with non nanosecond precision could not be serialized correctly (:issue:`53686`)
574575
- Bug when writing and reading empty Stata dta files where dtype information was lost (:issue:`46240`)
575576
- Bug where ``bz2`` was treated as a hard requirement (:issue:`53857`)
576577

meson.build

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ versioneer = files('generate_version.py')
2727
add_project_arguments('-DNPY_NO_DEPRECATED_API=0', language : 'c')
2828
add_project_arguments('-DNPY_NO_DEPRECATED_API=0', language : 'cpp')
2929

30+
# Allow supporting older numpys than the version compiled against
31+
# Set the define to the min supported version of numpy for pandas
32+
# e.g. right now this is targeting numpy 1.21+
33+
add_project_arguments('-DNPY_TARGET_VERSION=NPY_1_21_API_VERSION', language : 'c')
34+
add_project_arguments('-DNPY_TARGET_VERSION=NPY_1_21_API_VERSION', language : 'cpp')
35+
36+
3037
if fs.exists('_version_meson.py')
3138
py.install_sources('_version_meson.py', pure: false, subdir: 'pandas')
3239
else

pandas/_libs/include/pandas/datetime/date_conversions.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit);
1818
// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z
1919
// while base="ns" yields "2020-01-01T00:00:00.000000000Z"
2020
// len is mutated to save the length of the returned string
21-
char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len);
21+
char *int64ToIso(int64_t value,
22+
NPY_DATETIMEUNIT valueUnit,
23+
NPY_DATETIMEUNIT base,
24+
size_t *len);
2225

2326
// TODO(username): this function doesn't do a lot; should augment or
2427
// replace with scaleNanosecToUnit

pandas/_libs/include/pandas/datetime/pd_datetime.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ typedef struct {
3434
npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT,
3535
const npy_datetimestruct *);
3636
int (*scaleNanosecToUnit)(npy_int64 *, NPY_DATETIMEUNIT);
37-
char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, size_t *);
37+
char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, NPY_DATETIMEUNIT, size_t *);
3838
npy_datetime (*NpyDateTimeToEpoch)(npy_datetime, NPY_DATETIMEUNIT);
3939
char *(*PyDateTimeToIso)(PyObject *, NPY_DATETIMEUNIT, size_t *);
4040
npy_datetime (*PyDateTimeToEpoch)(PyObject *, NPY_DATETIMEUNIT);
@@ -73,8 +73,8 @@ static PandasDateTime_CAPI *PandasDateTimeAPI = NULL;
7373
(npy_datetimestruct))
7474
#define scaleNanosecToUnit(value, unit) \
7575
PandasDateTimeAPI->scaleNanosecToUnit((value), (unit))
76-
#define int64ToIso(value, base, len) \
77-
PandasDateTimeAPI->int64ToIso((value), (base), (len))
76+
#define int64ToIso(value, valueUnit, base, len) \
77+
PandasDateTimeAPI->int64ToIso((value), (valueUnit), (base), (len))
7878
#define NpyDateTimeToEpoch(dt, base) \
7979
PandasDateTimeAPI->NpyDateTimeToEpoch((dt), (base))
8080
#define PyDateTimeToIso(obj, base, len) \

pandas/_libs/src/datetime/date_conversions.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,14 @@ int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) {
4141
}
4242

4343
/* Converts the int64_t representation of a datetime to ISO; mutates len */
44-
char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) {
44+
char *int64ToIso(int64_t value,
45+
NPY_DATETIMEUNIT valueUnit,
46+
NPY_DATETIMEUNIT base,
47+
size_t *len) {
4548
npy_datetimestruct dts;
4649
int ret_code;
4750

48-
pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts);
51+
pandas_datetime_to_datetimestruct(value, valueUnit, &dts);
4952

5053
*len = (size_t)get_datetime_iso_8601_strlen(0, base);
5154
char *result = PyObject_Malloc(*len);

pandas/_libs/src/vendored/ujson/python/objToJSON.c

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ typedef struct __PyObjectEncoder {
131131

132132
int datetimeIso;
133133
NPY_DATETIMEUNIT datetimeUnit;
134+
NPY_DATETIMEUNIT valueUnit;
134135

135136
// output format style for pandas data types
136137
int outputFormat;
@@ -350,7 +351,8 @@ static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc,
350351
static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
351352
JSONTypeContext *tc, size_t *len) {
352353
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
353-
GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, base, len);
354+
NPY_DATETIMEUNIT valueUnit = ((PyObjectEncoder *)tc->encoder)->valueUnit;
355+
GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, valueUnit, base, len);
354356
return GET_TC(tc)->cStr;
355357
}
356358

@@ -364,8 +366,9 @@ static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
364366
/* JSON callback */
365367
static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
366368
size_t *len) {
367-
if (!PyDate_Check(obj)) {
368-
PyErr_SetString(PyExc_TypeError, "Expected date object");
369+
if (!PyDate_Check(obj) && !PyDateTime_Check(obj)) {
370+
PyErr_SetString(PyExc_TypeError, "Expected date or datetime object");
371+
((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
369372
return NULL;
370373
}
371374

@@ -502,6 +505,10 @@ int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
502505
GET_TC(tc)->itemValue = obj;
503506
Py_INCREF(obj);
504507
((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array);
508+
// Also write the resolution (unit) of the ndarray
509+
PyArray_Descr *dtype = PyArray_DESCR(npyarr->array);
510+
((PyObjectEncoder *)tc->encoder)->valueUnit =
511+
get_datetime_metadata_from_dtype(dtype).base;
505512
((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr;
506513
((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
507514
} else {
@@ -1255,6 +1262,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
12551262
char **ret;
12561263
char *dataptr, *cLabel;
12571264
int type_num;
1265+
PyArray_Descr *dtype;
12581266
NPY_DATETIMEUNIT base = enc->datetimeUnit;
12591267

12601268
if (!labels) {
@@ -1283,6 +1291,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
12831291
stride = PyArray_STRIDE(labels, 0);
12841292
dataptr = PyArray_DATA(labels);
12851293
type_num = PyArray_TYPE(labels);
1294+
dtype = PyArray_DESCR(labels);
12861295

12871296
for (i = 0; i < num; i++) {
12881297
item = PyArray_GETITEM(labels, dataptr);
@@ -1293,7 +1302,8 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
12931302
}
12941303

12951304
int is_datetimelike = 0;
1296-
npy_int64 nanosecVal;
1305+
npy_int64 i8date;
1306+
NPY_DATETIMEUNIT dateUnit = NPY_FR_ns;
12971307
if (PyTypeNum_ISDATETIME(type_num)) {
12981308
is_datetimelike = 1;
12991309
PyArray_VectorUnaryFunc *castfunc =
@@ -1303,35 +1313,37 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
13031313
"Cannot cast numpy dtype %d to long",
13041314
enc->npyType);
13051315
}
1306-
castfunc(dataptr, &nanosecVal, 1, NULL, NULL);
1316+
castfunc(dataptr, &i8date, 1, NULL, NULL);
1317+
dateUnit = get_datetime_metadata_from_dtype(dtype).base;
13071318
} else if (PyDate_Check(item) || PyDelta_Check(item)) {
13081319
is_datetimelike = 1;
13091320
if (PyObject_HasAttrString(item, "_value")) {
13101321
// see test_date_index_and_values for case with non-nano
1311-
nanosecVal = get_long_attr(item, "_value");
1322+
i8date = get_long_attr(item, "_value");
13121323
} else {
13131324
if (PyDelta_Check(item)) {
1314-
nanosecVal = total_seconds(item) *
1325+
i8date = total_seconds(item) *
13151326
1000000000LL; // nanoseconds per second
13161327
} else {
13171328
// datetime.* objects don't follow above rules
1318-
nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns);
1329+
i8date = PyDateTimeToEpoch(item, NPY_FR_ns);
13191330
}
13201331
}
13211332
}
13221333

13231334
if (is_datetimelike) {
1324-
if (nanosecVal == get_nat()) {
1335+
if (i8date == get_nat()) {
13251336
len = 4;
13261337
cLabel = PyObject_Malloc(len + 1);
13271338
strncpy(cLabel, "null", len + 1);
13281339
} else {
13291340
if (enc->datetimeIso) {
13301341
if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) {
1331-
cLabel = int64ToIsoDuration(nanosecVal, &len);
1342+
// TODO(username): non-nano timedelta support?
1343+
cLabel = int64ToIsoDuration(i8date, &len);
13321344
} else {
13331345
if (type_num == NPY_DATETIME) {
1334-
cLabel = int64ToIso(nanosecVal, base, &len);
1346+
cLabel = int64ToIso(i8date, dateUnit, base, &len);
13351347
} else {
13361348
cLabel = PyDateTimeToIso(item, base, &len);
13371349
}
@@ -1346,7 +1358,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
13461358
int size_of_cLabel = 21; // 21 chars for int 64
13471359
cLabel = PyObject_Malloc(size_of_cLabel);
13481360
snprintf(cLabel, size_of_cLabel, "%" NPY_DATETIME_FMT,
1349-
NpyDateTimeToEpoch(nanosecVal, base));
1361+
NpyDateTimeToEpoch(i8date, base));
13501362
len = strlen(cLabel);
13511363
}
13521364
}
@@ -1538,13 +1550,25 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
15381550
tc->type = JT_UTF8;
15391551
return;
15401552
} else if (PyArray_IsScalar(obj, Datetime)) {
1553+
npy_int64 longVal;
15411554
if (((PyDatetimeScalarObject *)obj)->obval == get_nat()) {
15421555
tc->type = JT_NULL;
15431556
return;
15441557
}
1558+
PyArray_Descr *dtype = PyArray_DescrFromScalar(obj);
1559+
if (!PyTypeNum_ISDATETIME(dtype->type_num)) {
1560+
PyErr_Format(PyExc_ValueError, "Could not get resolution of datetime");
1561+
return;
1562+
}
1563+
1564+
PyArray_Descr *outcode = PyArray_DescrFromType(NPY_INT64);
1565+
PyArray_CastScalarToCtype(obj, &longVal, outcode);
1566+
Py_DECREF(outcode);
15451567

15461568
if (enc->datetimeIso) {
1547-
pc->PyTypeToUTF8 = PyDateTimeToIsoCallback;
1569+
GET_TC(tc)->longValue = longVal;
1570+
pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback;
1571+
enc->valueUnit = get_datetime_metadata_from_dtype(dtype).base;
15481572
tc->type = JT_UTF8;
15491573
} else {
15501574
NPY_DATETIMEUNIT base =

pandas/compat/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
ISMUSL,
2020
PY310,
2121
PY311,
22+
PY312,
2223
PYPY,
2324
)
2425
import pandas.compat.compressors
@@ -189,5 +190,6 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
189190
"ISMUSL",
190191
"PY310",
191192
"PY311",
193+
"PY312",
192194
"PYPY",
193195
]

pandas/compat/_constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
PY310 = sys.version_info >= (3, 10)
1717
PY311 = sys.version_info >= (3, 11)
18+
PY312 = sys.version_info >= (3, 12)
1819
PYPY = platform.python_implementation() == "PyPy"
1920
ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "")
2021
REF_COUNT = 2 if PY311 else 3
@@ -24,5 +25,6 @@
2425
"ISMUSL",
2526
"PY310",
2627
"PY311",
28+
"PY312",
2729
"PYPY",
2830
]

pandas/core/arrays/_mixins.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,9 @@ def _fill_mask_inplace(
296296
func(self._ndarray.T, limit=limit, mask=mask.T)
297297

298298
@doc(ExtensionArray.fillna)
299-
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
299+
def fillna(
300+
self, value=None, method=None, limit: int | None = None, copy: bool = True
301+
) -> Self:
300302
value, method = validate_fillna_kwargs(
301303
value, method, validate_scalar_dict_value=False
302304
)
@@ -313,7 +315,9 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
313315
# TODO: check value is None
314316
# (for now) when self.ndim == 2, we assume axis=0
315317
func = missing.get_fill_func(method, ndim=self.ndim)
316-
npvalues = self._ndarray.T.copy()
318+
npvalues = self._ndarray.T
319+
if copy:
320+
npvalues = npvalues.copy()
317321
func(npvalues, limit=limit, mask=mask.T)
318322
npvalues = npvalues.T
319323

@@ -322,14 +326,20 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
322326
new_values = self._from_backing_data(npvalues)
323327
else:
324328
# fill with value
325-
new_values = self.copy()
329+
if copy:
330+
new_values = self.copy()
331+
else:
332+
new_values = self[:]
326333
new_values[mask] = value
327334
else:
328335
# We validate the fill_value even if there is nothing to fill
329336
if value is not None:
330337
self._validate_setitem_value(value)
331338

332-
new_values = self.copy()
339+
if not copy:
340+
new_values = self[:]
341+
else:
342+
new_values = self.copy()
333343
return new_values
334344

335345
# ------------------------------------------------------------------------

0 commit comments

Comments
 (0)