Skip to content

Commit 2958b9d

Browse files
committed
initial format support
1 parent 1e5fee8 commit 2958b9d

File tree

9 files changed

+86
-11
lines changed

9 files changed

+86
-11
lines changed

pandas/_libs/tslib.pyx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def _test_parse_iso8601(ts: str):
9393
elif ts == 'today':
9494
return Timestamp.now().normalize()
9595

96-
string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True)
96+
string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True, "", False)
9797
obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts)
9898
check_dts_bounds(&obj.dts)
9999
if out_local == 1:
@@ -449,6 +449,8 @@ cpdef array_to_datetime(
449449
bint utc=False,
450450
bint require_iso8601=False,
451451
bint allow_mixed=False,
452+
str format="",
453+
bint exact=False,
452454
):
453455
"""
454456
Converts a 1D array of date-like values to a numpy array of either:
@@ -598,7 +600,7 @@ cpdef array_to_datetime(
598600

599601
string_to_dts_failed = string_to_dts(
600602
val, &dts, &out_bestunit, &out_local,
601-
&out_tzoffset, False
603+
&out_tzoffset, False, format, exact
602604
)
603605
if string_to_dts_failed:
604606
# An error at this point is a _parsing_ error

pandas/_libs/tslibs/conversion.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
488488
else:
489489
string_to_dts_failed = string_to_dts(
490490
ts, &dts, &out_bestunit, &out_local,
491-
&out_tzoffset, False
491+
&out_tzoffset, False, "", False
492492
)
493493
if not string_to_dts_failed:
494494
try:

pandas/_libs/tslibs/np_datetime.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ cdef int string_to_dts(
9595
int* out_local,
9696
int* out_tzoffset,
9797
bint want_exc,
98+
str format,
99+
bint exact
98100
) except? -1
99101

100102
cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype)

pandas/_libs/tslibs/np_datetime.pyx

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ cdef extern from "src/datetime/np_datetime_strings.h":
5252
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
5353
npy_datetimestruct *out,
5454
NPY_DATETIMEUNIT *out_bestunit,
55-
int *out_local, int *out_tzoffset)
55+
int *out_local, int *out_tzoffset,
56+
const char *format, int exact)
5657

5758

5859
# ----------------------------------------------------------------------
@@ -273,14 +274,20 @@ cdef inline int string_to_dts(
273274
int* out_local,
274275
int* out_tzoffset,
275276
bint want_exc,
277+
str format,
278+
bint exact,
276279
) except? -1:
277280
cdef:
278281
Py_ssize_t length
279282
const char* buf
283+
Py_ssize_t format_length
284+
const char* format_buf
280285

281286
buf = get_c_string_buf_and_size(val, &length)
287+
format_buf = get_c_string_buf_and_size(format, &format_length)
282288
return parse_iso_8601_datetime(buf, length, want_exc,
283-
dts, out_bestunit, out_local, out_tzoffset)
289+
dts, out_bestunit, out_local, out_tzoffset,
290+
format_buf, exact)
284291

285292

286293
cpdef ndarray astype_overflowsafe(

pandas/_libs/tslibs/parsing.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ cdef parse_datetime_string_with_reso(
409409
# TODO: does this render some/all of parse_delimited_date redundant?
410410
string_to_dts_failed = string_to_dts(
411411
date_string, &dts, &out_bestunit, &out_local,
412-
&out_tzoffset, False
412+
&out_tzoffset, False, "", False
413413
)
414414
if not string_to_dts_failed:
415415
if dts.ps != 0 or out_local:

pandas/_libs/tslibs/src/datetime/np_datetime_strings.c

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,24 @@ This file implements string parsing and creation for NumPy datetime.
6666
*
6767
* Returns 0 on success, -1 on failure.
6868
*/
69+
70+
#define FORMAT_STARTSWITH(ch) \
71+
if (*format != ch) { \
72+
goto parse_error; \
73+
} \
74+
++format; \
75+
6976
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
7077
npy_datetimestruct *out,
7178
NPY_DATETIMEUNIT *out_bestunit,
72-
int *out_local, int *out_tzoffset) {
79+
int *out_local, int *out_tzoffset,
80+
const char* format, int exact) {
7381
int year_leap = 0;
7482
int i, numdigits;
7583
const char *substr;
7684
int sublen;
7785
NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC;
86+
int format_len = strlen(format);
7887

7988
/* If year-month-day are separated by a valid separator,
8089
* months/days without leading zeroes will be parsed
@@ -104,14 +113,31 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
104113
while (sublen > 0 && isspace(*substr)) {
105114
++substr;
106115
--sublen;
116+
if (!isspace(*format)) {
117+
goto parse_error;
118+
}
119+
++format;
120+
if (!*format) {
121+
goto parse_error;
122+
}
107123
}
108124

109125
/* Leading '-' sign for negative year */
110126
if (*substr == '-') {
111127
++substr;
112128
--sublen;
129+
if (*format != '-') {
130+
goto parse_error;
131+
}
132+
++format;
133+
if (!*format) {
134+
goto parse_error;
135+
}
113136
}
114137

138+
FORMAT_STARTSWITH('%');
139+
FORMAT_STARTSWITH('Y');
140+
115141
if (sublen == 0) {
116142
goto parse_error;
117143
}
@@ -139,6 +165,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
139165
if (out_local != NULL) {
140166
*out_local = 0;
141167
}
168+
if (*format) {
169+
goto parse_error;
170+
}
142171
bestunit = NPY_FR_Y;
143172
goto finish;
144173
}
@@ -156,6 +185,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
156185
ymd_sep = valid_ymd_sep[i];
157186
++substr;
158187
--sublen;
188+
FORMAT_STARTSWITH(ymd_sep);
159189
/* Cannot have trailing separator */
160190
if (sublen == 0 || !isdigit(*substr)) {
161191
goto parse_error;
@@ -167,6 +197,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
167197
out->month = (*substr - '0');
168198
++substr;
169199
--sublen;
200+
FORMAT_STARTSWITH('%');
201+
FORMAT_STARTSWITH('m');
170202
/* Second digit optional if there was a separator */
171203
if (isdigit(*substr)) {
172204
out->month = 10 * out->month + (*substr - '0');
@@ -190,6 +222,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
190222
if (!has_ymd_sep) {
191223
goto parse_error;
192224
}
225+
if (*format) {
226+
goto parse_error;
227+
}
193228
if (out_local != NULL) {
194229
*out_local = 0;
195230
}
@@ -203,6 +238,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
203238
}
204239
++substr;
205240
--sublen;
241+
FORMAT_STARTSWITH(ymd_sep);
206242
}
207243

208244
/* PARSE THE DAY */
@@ -213,6 +249,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
213249
out->day = (*substr - '0');
214250
++substr;
215251
--sublen;
252+
FORMAT_STARTSWITH('%');
253+
FORMAT_STARTSWITH('d');
216254
/* Second digit optional if there was a separator */
217255
if (isdigit(*substr)) {
218256
out->day = 10 * out->day + (*substr - '0');
@@ -235,13 +273,17 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
235273
if (out_local != NULL) {
236274
*out_local = 0;
237275
}
276+
if (*format) {
277+
goto parse_error;
278+
}
238279
bestunit = NPY_FR_D;
239280
goto finish;
240281
}
241282

242283
if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
243284
goto parse_error;
244285
}
286+
FORMAT_STARTSWITH(*substr);
245287
++substr;
246288
--sublen;
247289

@@ -250,6 +292,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
250292
if (!isdigit(*substr)) {
251293
goto parse_error;
252294
}
295+
FORMAT_STARTSWITH('%');
296+
FORMAT_STARTSWITH('H');
253297
out->hour = (*substr - '0');
254298
++substr;
255299
--sublen;
@@ -274,6 +318,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
274318
if (!hour_was_2_digits) {
275319
goto parse_error;
276320
}
321+
if (*format) {
322+
goto parse_error;
323+
}
277324
bestunit = NPY_FR_h;
278325
goto finish;
279326
}
@@ -286,6 +333,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
286333
if (sublen == 0 || !isdigit(*substr)) {
287334
goto parse_error;
288335
}
336+
FORMAT_STARTSWITH(':');
289337
} else if (!isdigit(*substr)) {
290338
if (!hour_was_2_digits) {
291339
goto parse_error;
@@ -298,6 +346,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
298346
out->min = (*substr - '0');
299347
++substr;
300348
--sublen;
349+
FORMAT_STARTSWITH('%');
350+
FORMAT_STARTSWITH('M');
301351
/* Second digit optional if there was a separator */
302352
if (isdigit(*substr)) {
303353
out->min = 10 * out->min + (*substr - '0');
@@ -317,12 +367,16 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
317367

318368
if (sublen == 0) {
319369
bestunit = NPY_FR_m;
370+
if (*format) {
371+
goto parse_error;
372+
}
320373
goto finish;
321374
}
322375

323376
/* If we make it through this condition block, then the next
324377
* character is a digit. */
325378
if (has_hms_sep && *substr == ':') {
379+
FORMAT_STARTSWITH(':');
326380
++substr;
327381
--sublen;
328382
/* Cannot have a trailing ':' */
@@ -339,6 +393,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
339393
out->sec = (*substr - '0');
340394
++substr;
341395
--sublen;
396+
FORMAT_STARTSWITH('%');
397+
FORMAT_STARTSWITH('S');
342398
/* Second digit optional if there was a separator */
343399
if (isdigit(*substr)) {
344400
out->sec = 10 * out->sec + (*substr - '0');
@@ -360,12 +416,15 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
360416
if (sublen > 0 && *substr == '.') {
361417
++substr;
362418
--sublen;
419+
FORMAT_STARTSWITH('.');
363420
} else {
364421
bestunit = NPY_FR_s;
365422
goto parse_timezone;
366423
}
367424

368425
/* PARSE THE MICROSECONDS (0 to 6 digits) */
426+
FORMAT_STARTSWITH('%');
427+
FORMAT_STARTSWITH('f');
369428
numdigits = 0;
370429
for (i = 0; i < 6; ++i) {
371430
out->us *= 10;

pandas/_libs/tslibs/src/datetime/np_datetime_strings.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ parse_iso_8601_datetime(const char *str, int len, int want_exc,
5858
npy_datetimestruct *out,
5959
NPY_DATETIMEUNIT *out_bestunit,
6060
int *out_local,
61-
int *out_tzoffset);
61+
int *out_tzoffset,
62+
const char* format,
63+
int exact);
6264

6365
/*
6466
* Provides a string length to use for converting datetime

pandas/core/arrays/datetimes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,6 +2162,8 @@ def objects_to_datetime64ns(
21622162
require_iso8601: bool = False,
21632163
allow_object: bool = False,
21642164
allow_mixed: bool = False,
2165+
format: str = "",
2166+
exact: bool = False
21652167
):
21662168
"""
21672169
Convert data to array of timestamps.
@@ -2209,6 +2211,8 @@ def objects_to_datetime64ns(
22092211
yearfirst=yearfirst,
22102212
require_iso8601=require_iso8601,
22112213
allow_mixed=allow_mixed,
2214+
format=format,
2215+
exact=exact
22122216
)
22132217
result = result.reshape(data.shape, order=order)
22142218
except OverflowError as err:

pandas/core/tools/datetimes.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -426,16 +426,14 @@ def _convert_listlike_datetimes(
426426
format_is_iso8601 = format_is_iso(format)
427427
if format_is_iso8601:
428428
require_iso8601 = not infer_datetime_format
429-
format = None
430429

431-
if format is not None:
430+
if format is not None and not require_iso8601:
432431
res = _to_datetime_with_format(
433432
arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format
434433
)
435434
if res is not None:
436435
return res
437436

438-
assert format is None or infer_datetime_format
439437
utc = tz == "utc"
440438
result, tz_parsed = objects_to_datetime64ns(
441439
arg,
@@ -445,6 +443,7 @@ def _convert_listlike_datetimes(
445443
errors=errors,
446444
require_iso8601=require_iso8601,
447445
allow_object=True,
446+
format=format, exact=False
448447
)
449448

450449
if tz_parsed is not None:

0 commit comments

Comments
 (0)