Skip to content

Commit 56bf5fd

Browse files
author
MarcoGorelli
committed
clarify pyarrow tests
1 parent 78fc36a commit 56bf5fd

File tree

3 files changed

+17
-7
lines changed

3 files changed

+17
-7
lines changed

pandas/_libs/tslibs/parsing.pyx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,13 +1101,15 @@ cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
11011101
if (day_index > month_index) and dayfirst:
11021102
warnings.warn(
11031103
f"Parsing dates in {format} format when dayfirst=True was specified. "
1104-
"Pass `dayfirst=False` or `format='{format}'` to silence this warning.",
1104+
f"Pass `dayfirst=False` or explicitly specify a format to silence "
1105+
"this warning.",
11051106
stacklevel=find_stack_level(inspect.currentframe()),
11061107
)
11071108
if (day_index < month_index) and not dayfirst:
11081109
warnings.warn(
11091110
f"Parsing dates in {format} format when dayfirst=False was specified. "
1110-
"Pass `dayfirst=True` or `format='{format}'` to silence this warning.",
1111+
f"Pass `dayfirst=True` or explicitly specify a format to silence "
1112+
"this warning.",
11111113
stacklevel=find_stack_level(inspect.currentframe()),
11121114
)
11131115

pandas/io/parsers/base_parser.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1234,7 +1234,9 @@ def _isindex(colspec):
12341234
continue
12351235
# Pyarrow engine returns Series which we need to convert to
12361236
# numpy array before converter, its a no-op for other parsers
1237-
data_dict[colspec] = converter(np.asarray(data_dict[colspec]))
1237+
inp = np.asarray(data_dict[colspec])
1238+
res = converter(inp)
1239+
data_dict[colspec] = res
12381240
else:
12391241
new_name, col, old_names = _try_convert_dates(
12401242
converter, colspec, data_dict, orig_names

pandas/tests/io/parser/test_parse_dates.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,7 +1245,10 @@ def test_bad_date_parse(all_parsers, cache_dates, value):
12451245
parser = all_parsers
12461246
s = StringIO((f"{value},\n") * 50000)
12471247

1248-
if "PyArrowParser" in parser.__str__():
1248+
if parser.engine == "pyarrow":
1249+
# None in input gets converted to 'None', for which
1250+
# pandas tries to guess the datetime format, triggering
1251+
# the warning. TODO: parse dates directly in pyarrow, see
12491252
# https://github.com/pandas-dev/pandas/issues/48017
12501253
warn = UserWarning
12511254
else:
@@ -1266,12 +1269,15 @@ def test_bad_date_parse(all_parsers, cache_dates, value):
12661269
@pytest.mark.parametrize("value", ["0"])
12671270
def test_bad_date_parse_with_warning(all_parsers, cache_dates, value):
12681271
# if we have an invalid date make sure that we handle this with
1269-
# and w/o the cache properly
1272+
# and w/o the cache properly. TODO: parse dates directly in pyarrow, see
1273+
# https://github.com/pandas-dev/pandas/issues/48017
1274+
12701275
parser = all_parsers
12711276
s = StringIO((f"{value},\n") * 50000)
12721277

1273-
if "PyArrowParser" in parser.__str__():
1274-
# https://github.com/pandas-dev/pandas/issues/48017
1278+
if parser.engine == "pyarrow":
1279+
# pyarrow reads "0" as 0 (of type int64), and so
1280+
# pandas doesn't try to guess the datetime format
12751281
warn = None
12761282
else:
12771283
warn = UserWarning

0 commit comments

Comments
 (0)