Skip to content

Commit c520a51

Browse files
author
Marco Gorelli
committed
debug
1 parent 2958b9d commit c520a51

File tree

3 files changed

+124
-2
lines changed

3 files changed

+124
-2
lines changed

pandas/_libs/tslib.pyx

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,15 @@ cpdef array_to_datetime(
570570
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
571571

572572
elif is_integer_object(val) or is_float_object(val):
573+
if require_iso8601:
574+
if is_coerce:
575+
iresult[i] = NPY_NAT
576+
continue
577+
elif is_raise:
578+
raise ValueError(
579+
f"time data \"{val}\" at position {i} doesn't match format \"{format}\""
580+
)
581+
return values, tz_out
573582
# these must be ns unit by-definition
574583
seen_integer = True
575584

@@ -615,7 +624,7 @@ cpdef array_to_datetime(
615624
continue
616625
elif is_raise:
617626
raise ValueError(
618-
f"time data \"{val}\" at position {i} doesn't match format specified"
627+
f"time data \"{val}\" at position {i} doesn't match \"{format}\""
619628
)
620629
return values, tz_out
621630

@@ -646,6 +655,16 @@ cpdef array_to_datetime(
646655
_ts = convert_datetime_to_tsobject(py_dt, None)
647656
iresult[i] = _ts.value
648657
if not string_to_dts_failed:
658+
if require_iso8601:
659+
if is_coerce:
660+
iresult[i] = NPY_NAT
661+
continue
662+
elif is_raise:
663+
raise ValueError(
664+
f"time data \"{val}\" at position {i} doesn't "
665+
f"match format \"{format}\""
666+
)
667+
return values, tz_out
649668
# No error reported by string_to_dts, pick back up
650669
# where we left off
651670
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)

pandas/_libs/tslibs/src/datetime/np_datetime_strings.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ This file implements string parsing and creation for NumPy datetime.
2929
#include <Python.h>
3030

3131
#include <time.h>
32+
#include <stdio.h>
3233

3334
#include <numpy/arrayobject.h>
3435
#include <numpy/arrayscalars.h>
@@ -68,22 +69,26 @@ This file implements string parsing and creation for NumPy datetime.
6869
*/
6970

7071
#define FORMAT_STARTSWITH(ch) \
72+
if (format_len > 0){ \
7173
if (*format != ch) { \
7274
goto parse_error; \
7375
} \
7476
++format; \
77+
} \
7578

7679
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
7780
npy_datetimestruct *out,
7881
NPY_DATETIMEUNIT *out_bestunit,
7982
int *out_local, int *out_tzoffset,
8083
const char* format, int exact) {
84+
printf("entering\n");
8185
int year_leap = 0;
8286
int i, numdigits;
8387
const char *substr;
8488
int sublen;
8589
NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC;
8690
int format_len = strlen(format);
91+
printf("format len: %d", format_len);
8792

8893
/* If year-month-day are separated by a valid separator,
8994
* months/days without leading zeroes will be parsed

pandas/tests/tools/test_to_datetime.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1751,6 +1751,104 @@ def test_to_datetime_iso8601(self, cache, arg, exp_str):
17511751
exp = Timestamp(exp_str)
17521752
assert result[0] == exp
17531753

1754+
@pytest.mark.parametrize(
1755+
"input, format",
1756+
[
1757+
("2012", "%Y-%m"),
1758+
("2012-01", "%Y-%m-%d"),
1759+
("2012-01-01", "%Y-%m-%d %H"),
1760+
("2012-01-01 10", "%Y-%m-%d %H:%M"),
1761+
("2012-01-01 10:00", "%Y-%m-%d %H:%M:%S"),
1762+
(0, "%Y-%m-%d"),
1763+
],
1764+
)
1765+
@pytest.mark.parametrize("exact", [True, False])
1766+
def test_to_datetime_iso8601_fails(self, input, format, exact):
1767+
# https://github.com/pandas-dev/pandas/issues/12649
1768+
with pytest.raises(
1769+
ValueError,
1770+
match=rf"time data \"{input}\" at position 0 doesn't match format {format}",
1771+
):
1772+
to_datetime(input, format=format, exact=exact)
1773+
1774+
@pytest.mark.parametrize(
1775+
"input, format",
1776+
[
1777+
("2012-01-01", "%Y-%m"),
1778+
("2012-01-01 10", "%Y-%m-%d"),
1779+
("2012-01-01 10:00", "%Y-%m-%d %H"),
1780+
("2012-01-01 10:00:00", "%Y-%m-%d %H:%M"),
1781+
(0, "%Y-%m-%d"),
1782+
],
1783+
)
1784+
def test_to_datetime_iso8601_exact_fails(self, input, format):
1785+
# https://github.com/pandas-dev/pandas/issues/12649
1786+
with pytest.raises(
1787+
ValueError,
1788+
match=rf"time data \"{input}\" at position 0 doesn't match format {format}",
1789+
):
1790+
to_datetime(input, format=format)
1791+
1792+
@pytest.mark.parametrize(
1793+
"input, format",
1794+
[
1795+
("2012-01-01", "%Y-%m"),
1796+
("2012-01-01 00", "%Y-%m-%d"),
1797+
("2012-01-01 00:00", "%Y-%m-%d %H"),
1798+
("2012-01-01 00:00:00", "%Y-%m-%d %H:%M"),
1799+
],
1800+
)
1801+
def test_to_datetime_iso8601_non_exact(self, input, format):
1802+
# https://github.com/pandas-dev/pandas/issues/12649
1803+
expected = Timestamp(2012, 1, 1)
1804+
result = to_datetime(input, format=format, exact=False)
1805+
assert result == expected
1806+
1807+
@pytest.mark.parametrize(
1808+
"input, format",
1809+
[
1810+
("2020-01", "%Y/%m"),
1811+
("2020-01-01", "%Y/%m/%d"),
1812+
("2020-01-01 00", "%Y/%m/%dT%H"),
1813+
("2020-01-01T00", "%Y/%m/%d %H"),
1814+
("2020-01-01 00:00", "%Y/%m/%dT%H:%M"),
1815+
("2020-01-01T00:00", "%Y/%m/%d %H:%M"),
1816+
("2020-01-01 00:00:00", "%Y/%m/%dT%H:%M:%S"),
1817+
("2020-01-01T00:00:00", "%Y/%m/%d %H:%M:%S"),
1818+
],
1819+
)
1820+
def test_to_datetime_iso8601_separator(self, input, format):
1821+
# https://github.com/pandas-dev/pandas/issues/12649
1822+
with pytest.raises(
1823+
ValueError,
1824+
match=(
1825+
rf"time data \"{input}\" at position 0 doesn\'t match format {format}"
1826+
),
1827+
):
1828+
to_datetime(input, format=format)
1829+
1830+
@pytest.mark.parametrize(
1831+
"input, format",
1832+
[
1833+
("2020-01", "%Y-%m"),
1834+
("2020-01-01", "%Y-%m-%d"),
1835+
("2020-01-01 00", "%Y-%m-%d %H"),
1836+
("2020-01-01T00", "%Y-%m-%dT%H"),
1837+
("2020-01-01 00:00", "%Y-%m-%d %H:%M"),
1838+
("2020-01-01T00:00", "%Y-%m-%dT%H:%M"),
1839+
("2020-01-01 00:00:00", "%Y-%m-%d %H:%M:%S"),
1840+
("2020-01-01T00:00:00", "%Y-%m-%dT%H:%M:%S"),
1841+
("2020-01-01T00:00:00.000", "%Y-%m-%dT%H:%M:%S.%f"),
1842+
("2020-01-01T00:00:00.000000", "%Y-%m-%dT%H:%M:%S.%f"),
1843+
("2020-01-01T00:00:00.000000000", "%Y-%m-%dT%H:%M:%S.%f"),
1844+
],
1845+
)
1846+
def test_to_datetime_iso8601_valid(self, input, format):
1847+
# https://github.com/pandas-dev/pandas/issues/12649
1848+
expected = Timestamp(2020, 1, 1)
1849+
result = to_datetime(input, format=format)
1850+
assert result == expected
1851+
17541852
def test_to_datetime_default(self, cache):
17551853
rs = to_datetime("2001", cache=cache)
17561854
xp = datetime(2001, 1, 1)
@@ -2264,7 +2362,7 @@ def test_day_not_in_month_raise(self, cache):
22642362

22652363
@pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"])
22662364
def test_day_not_in_month_raise_value(self, cache, arg):
2267-
msg = f'time data "{arg}" at position 0 doesn\'t match format specified'
2365+
msg = f'time data "{arg}" at position 0 doesn\'t match format \"%Y-%m-%d\"'
22682366
with pytest.raises(ValueError, match=msg):
22692367
to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache)
22702368

0 commit comments

Comments
 (0)