Skip to content

Commit 73a90f0

Browse files
author
MarcoGorelli
committed
wip
1 parent 638284c commit 73a90f0

File tree

2 files changed

+64
-94
lines changed

2 files changed

+64
-94
lines changed

pandas/tests/io/parser/test_parse_dates.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,15 +1269,15 @@ def test_bad_date_parse(all_parsers, cache_dates, value):
12691269
@pytest.mark.parametrize("value", ["0"])
12701270
def test_bad_date_parse_with_warning(all_parsers, cache_dates, value):
12711271
# if we have an invalid date make sure that we handle this with
1272-
# and w/o the cache properly. TODO: parse dates directly in pyarrow, see
1273-
# https://github.com/pandas-dev/pandas/issues/48017
1274-
1272+
# and w/o the cache properly.
12751273
parser = all_parsers
12761274
s = StringIO((f"{value},\n") * 50000)
12771275

12781276
if parser.engine == "pyarrow":
12791277
# pyarrow reads "0" as 0 (of type int64), and so
12801278
# pandas doesn't try to guess the datetime format
1279+
# TODO: parse dates directly in pyarrow, see
1280+
# https://github.com/pandas-dev/pandas/issues/48017
12811281
warn = None
12821282
else:
12831283
warn = UserWarning
@@ -1729,7 +1729,7 @@ def test_parse_delimited_date_swap_with_warning(
17291729
expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
17301730
warning_msg = (
17311731
"Parsing dates in .* format when dayfirst=.* was specified. "
1732-
"Pass `dayfirst=.*` or `format='.*'` to silence this warning."
1732+
"Pass `dayfirst=.*` or explicitly specify a format to silence this warning."
17331733
)
17341734
result = parser.read_csv_check_warnings(
17351735
UserWarning,
@@ -1988,7 +1988,7 @@ def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst):
19881988
)
19891989
warning_msg = (
19901990
"Parsing dates in .* format when dayfirst=.* was specified. "
1991-
"Pass `dayfirst=.*` or `format='.*'` to silence this warning."
1991+
"Pass `dayfirst=.*` or explicitly specify a format to silence this warning."
19921992
)
19931993
with tm.assert_produces_warning(UserWarning, match=warning_msg):
19941994
res = read_csv(

pandas/tests/tools/test_to_datetime.py

Lines changed: 59 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -227,19 +227,14 @@ def test_to_datetime_with_NA(self, data, format, expected):
227227
result = to_datetime(data, format=format)
228228
tm.assert_index_equal(result, expected)
229229

230-
@pytest.mark.parametrize(
231-
"data, format, expected",
232-
[
233-
(["201010", pd.NA], None, DatetimeIndex(["2010-10-20", "NaT"])),
234-
],
235-
)
236-
def test_to_datetime_with_NA_with_warning(self, data, format, expected):
230+
def test_to_datetime_with_NA_with_warning(self):
237231
# GH#42957
238232
with tm.assert_produces_warning(
239233
UserWarning,
240234
match="Could not infer format",
241235
):
242-
result = to_datetime(data, format=format)
236+
result = to_datetime(["201010", pd.NA])
237+
expected = DatetimeIndex(["2010-10-20", "NaT"])
243238
tm.assert_index_equal(result, expected)
244239

245240
def test_to_datetime_format_integer(self, cache):
@@ -356,7 +351,6 @@ def test_to_datetime_with_non_exact(self, cache):
356351
],
357352
)
358353
def test_parse_nanoseconds_with_formula_no_warning(self, cache, arg):
359-
360354
# GH8989
361355
# truncating the nanoseconds when a format was provided
362356
expected = to_datetime(arg, cache=cache)
@@ -372,7 +366,6 @@ def test_parse_nanoseconds_with_formula_no_warning(self, cache, arg):
372366
],
373367
)
374368
def test_parse_nanoseconds_with_formula_with_warning(self, cache, arg):
375-
376369
# GH8989
377370
# truncating the nanoseconds when a format was provided
378371
with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
@@ -972,15 +965,13 @@ def test_datetime_invalid_datatype(self, arg):
972965

973966
@pytest.mark.parametrize("value", ["a", "00:01:99"])
974967
@pytest.mark.parametrize("infer", [True, False])
975-
@pytest.mark.parametrize("format", [None, "H%:M%:S%"])
976-
def test_datetime_invalid_scalar(self, value, format, infer):
968+
@pytest.mark.parametrize(
969+
"format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
970+
)
971+
def test_datetime_invalid_scalar(self, value, format, warning, infer):
977972
# GH24763
978-
if format is None:
979-
warn = UserWarning
980-
else:
981-
warn = None
982973
with tm.assert_produces_warning(
983-
warn,
974+
warning,
984975
match="Could not infer format",
985976
):
986977
res = to_datetime(
@@ -989,7 +980,7 @@ def test_datetime_invalid_scalar(self, value, format, infer):
989980
assert res == value
990981

991982
with tm.assert_produces_warning(
992-
warn,
983+
warning,
993984
match="Could not infer format",
994985
):
995986
res = to_datetime(
@@ -1003,22 +994,20 @@ def test_datetime_invalid_scalar(self, value, format, infer):
1003994
f"Given date string {value} not likely a datetime"
1004995
)
1005996
with pytest.raises(ValueError, match=msg):
1006-
with tm.assert_produces_warning(warn, match="Could not infer format"):
997+
with tm.assert_produces_warning(warning, match="Could not infer format"):
1007998
to_datetime(
1008999
value, errors="raise", format=format, infer_datetime_format=infer
10091000
)
10101001

10111002
@pytest.mark.parametrize("value", ["3000/12/11 00:00:00"])
10121003
@pytest.mark.parametrize("infer", [True, False])
1013-
@pytest.mark.parametrize("format", [None, "H%:M%:S%"])
1014-
def test_datetime_outofbounds_scalar(self, value, format, infer):
1004+
@pytest.mark.parametrize(
1005+
"format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
1006+
)
1007+
def test_datetime_outofbounds_scalar(self, value, format, warning, infer):
10151008
# GH24763
1016-
if format is None:
1017-
warn = UserWarning
1018-
else:
1019-
warn = None
10201009
with tm.assert_produces_warning(
1021-
warn,
1010+
warning,
10221011
match="Could not infer format",
10231012
):
10241013
res = to_datetime(
@@ -1027,7 +1016,7 @@ def test_datetime_outofbounds_scalar(self, value, format, infer):
10271016
assert res == value
10281017

10291018
with tm.assert_produces_warning(
1030-
warn,
1019+
warning,
10311020
match="Could not infer format",
10321021
):
10331022
res = to_datetime(
@@ -1042,31 +1031,26 @@ def test_datetime_outofbounds_scalar(self, value, format, infer):
10421031
value, errors="raise", format=format, infer_datetime_format=infer
10431032
)
10441033
else:
1045-
# TODO changes slightly, leave as GH comment
1046-
msg = r"^Out of bounds nanosecond timestamp: .*"
1047-
with pytest.raises(OutOfBoundsDatetime, match=msg):
1048-
with tm.assert_produces_warning(
1049-
warn,
1050-
match="Could not infer format",
1051-
):
1052-
to_datetime(
1053-
value,
1054-
errors="raise",
1055-
format=format,
1056-
infer_datetime_format=infer,
1057-
)
1034+
msg = "Out of bounds .* present at position 0"
1035+
with pytest.raises(
1036+
OutOfBoundsDatetime, match=msg
1037+
), tm.assert_produces_warning(
1038+
warning,
1039+
match="Could not infer format",
1040+
):
1041+
to_datetime(
1042+
value, errors="raise", format=format, infer_datetime_format=infer
1043+
)
10581044

10591045
@pytest.mark.parametrize("values", [["a"], ["00:01:99"], ["a", "b", "99:00:00"]])
10601046
@pytest.mark.parametrize("infer", [True, False])
1061-
@pytest.mark.parametrize("format", [None, "H%:M%:S%"])
1062-
def test_datetime_invalid_index(self, values, format, infer):
1047+
@pytest.mark.parametrize(
1048+
"format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
1049+
)
1050+
def test_datetime_invalid_index(self, values, format, warning, infer):
10631051
# GH24763
1064-
if format is None:
1065-
warn = UserWarning
1066-
else:
1067-
warn = None
10681052
with tm.assert_produces_warning(
1069-
warn,
1053+
warning,
10701054
match="Could not infer format",
10711055
):
10721056
res = to_datetime(
@@ -1075,7 +1059,7 @@ def test_datetime_invalid_index(self, values, format, infer):
10751059
tm.assert_index_equal(res, Index(values))
10761060

10771061
with tm.assert_produces_warning(
1078-
warn,
1062+
warning,
10791063
match="Could not infer format",
10801064
):
10811065
res = to_datetime(
@@ -1090,7 +1074,7 @@ def test_datetime_invalid_index(self, values, format, infer):
10901074
)
10911075
with pytest.raises(ValueError, match=msg):
10921076
with tm.assert_produces_warning(
1093-
warn,
1077+
warning,
10941078
match="Could not infer format",
10951079
):
10961080
to_datetime(
@@ -1199,7 +1183,9 @@ def test_to_datetime_converts_null_like_to_nat(self, cache, input, expected):
11991183
(Series([""] * 60), Series([NaT] * 60, dtype="datetime64[ns]")),
12001184
),
12011185
)
1202-
def test_to_datetime_converts_null_like_to_nat_warns(self, cache, input, expected):
1186+
def test_to_datetime_converts_null_like_to_nat_with_warning(
1187+
self, cache, input, expected
1188+
):
12031189
# GH35888
12041190
with tm.assert_produces_warning(
12051191
UserWarning,
@@ -1324,6 +1310,7 @@ def test_iso_8601_strings_with_different_offsets_utc(self):
13241310

13251311
def test_iso8601_strings_mixed_offsets_with_naive(self):
13261312
# GH 24992
1313+
# Can't parse consistently, need to parse each element in loop.
13271314
result = DatetimeIndex(
13281315
[
13291316
to_datetime(string, utc=True)
@@ -1350,6 +1337,7 @@ def test_iso8601_strings_mixed_offsets_with_naive(self):
13501337

13511338
def test_iso8601_strings_mixed_offsets_with_naive_reversed(self):
13521339
items = ["2018-11-28T00:00:00+12:00", "2018-11-28T00:00:00"]
1340+
# Can't parse consistently, need to parse each element in loop.
13531341
result = [to_datetime(item, utc=True) for item in items]
13541342
expected = [to_datetime(item, utc=True) for item in list(reversed(items))][::-1]
13551343
assert result == expected
@@ -1514,34 +1502,20 @@ def test_unit_with_numeric(self, cache, errors, dtype):
15141502
result = to_datetime(arr, errors=errors, cache=cache)
15151503
tm.assert_index_equal(result, expected)
15161504

1517-
@pytest.mark.parametrize(
1518-
"exp, arr",
1519-
[
1520-
[
1521-
["2015-06-19 05:33:20", "2015-05-27 22:33:20", "NaT", "NaT"],
1522-
[1.434692e18, 1.432766e18, "foo", "NaT"],
1523-
],
1524-
],
1525-
)
1526-
def test_unit_with_numeric_coerce(self, cache, exp, arr):
1505+
def test_unit_with_numeric_coerce(self, cache):
15271506
# but we want to make sure that we are coercing
15281507
# if we have ints/strings
1508+
exp = ["2015-06-19 05:33:20", "2015-05-27 22:33:20", "NaT", "NaT"]
1509+
arr = [1.434692e18, 1.432766e18, "foo", "NaT"]
15291510
expected = DatetimeIndex(exp)
15301511
result = to_datetime(arr, errors="coerce", cache=cache)
15311512
tm.assert_index_equal(result, expected)
15321513

1533-
@pytest.mark.parametrize(
1534-
"exp, arr",
1535-
[
1536-
[
1537-
["NaT", "2015-06-19 05:33:20", "2015-05-27 22:33:20"],
1538-
["foo", 1.434692e18, 1.432766e18],
1539-
],
1540-
],
1541-
)
1542-
def test_unit_with_numeric_coerce_warns(self, cache, exp, arr):
1514+
def test_unit_with_numeric_coerce_with_warning(self, cache):
15431515
# but we want to make sure that we are coercing
15441516
# if we have ints/strings
1517+
exp = ["NaT", "2015-06-19 05:33:20", "2015-05-27 22:33:20"]
1518+
arr = ["foo", 1.434692e18, 1.432766e18]
15451519
expected = DatetimeIndex(exp)
15461520
with tm.assert_produces_warning(
15471521
UserWarning,
@@ -2300,13 +2274,17 @@ def test_to_datetime_infer_datetime_format_series_start_with_nans(self, cache):
23002274
)
23012275

23022276
@pytest.mark.parametrize(
2303-
"tz_name, offset", [("UTC", 0), ("UTC-3", 180), ("UTC+3", -180)]
2277+
"tz_name, offset, warning",
2278+
[("UTC", 0, None), ("UTC-3", 180, UserWarning), ("UTC+3", -180, UserWarning)],
23042279
)
2305-
@pytest.mark.xfail(reason="todo", strict=False)
2306-
def test_infer_datetime_format_tz_name(self, tz_name, offset):
2280+
def test_infer_datetime_format_tz_name(self, tz_name, offset, warning):
23072281
# GH 33133
23082282
ser = Series([f"2019-02-02 08:07:13 {tz_name}"])
2309-
result = to_datetime(ser, infer_datetime_format=True)
2283+
with tm.assert_produces_warning(
2284+
warning,
2285+
match="Could not infer format",
2286+
):
2287+
result = to_datetime(ser, infer_datetime_format=True)
23102288
expected = Series(
23112289
[Timestamp("2019-02-02 08:07:13").tz_localize(pytz.FixedOffset(offset))]
23122290
)
@@ -2347,25 +2325,17 @@ class TestDaysInMonth:
23472325
# tests for issue #10154
23482326

23492327
@pytest.mark.parametrize(
2350-
"arg, format",
2351-
[
2352-
["2015-02-29", "%Y-%m-%d"],
2353-
["2015-02-32", "%Y-%m-%d"],
2354-
["2015-04-31", "%Y-%m-%d"],
2355-
],
2356-
)
2357-
def test_day_not_in_month_coerce(self, cache, arg, format):
2358-
assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
2359-
2360-
@pytest.mark.parametrize(
2361-
"arg, format",
2328+
"arg, format, warning",
23622329
[
2363-
["2015-02-29", None],
2330+
["2015-02-29", None, UserWarning],
2331+
["2015-02-29", "%Y-%m-%d", None],
2332+
["2015-02-32", "%Y-%m-%d", None],
2333+
["2015-04-31", "%Y-%m-%d", None],
23642334
],
23652335
)
2366-
def test_day_not_in_month_coerce_with_warning(self, cache, arg, format):
2336+
def test_day_not_in_month_coerce(self, cache, arg, format, warning):
23672337
with tm.assert_produces_warning(
2368-
UserWarning,
2338+
warning,
23692339
match="Could not infer format",
23702340
):
23712341
assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))

0 commit comments

Comments
 (0)