|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
3 | 3 | from typing import TYPE_CHECKING
|
| 4 | +import warnings |
4 | 5 |
|
5 | 6 | from pandas._config import using_pyarrow_string_dtype
|
6 | 7 |
|
7 | 8 | from pandas._libs import lib
|
| 9 | +from pandas._libs.parsers import ParserWarning |
8 | 10 | from pandas.compat._optional import import_optional_dependency
|
| 11 | +from pandas.util._exceptions import find_stack_level |
9 | 12 |
|
10 | 13 | from pandas.core.dtypes.inference import is_integer
|
11 | 14 |
|
@@ -85,6 +88,29 @@ def _get_pyarrow_options(self) -> None:
|
85 | 88 | and option_name
|
86 | 89 | in ("delimiter", "quote_char", "escape_char", "ignore_empty_lines")
|
87 | 90 | }
|
| 91 | + |
| 92 | + if "on_bad_lines" in self.kwds: |
| 93 | + if callable(self.kwds["on_bad_lines"]): |
| 94 | + self.parse_options["invalid_row_handler"] = self.kwds["on_bad_lines"] |
| 95 | + elif self.kwds["on_bad_lines"] == ParserBase.BadLineHandleMethod.ERROR: |
| 96 | + self.parse_options[ |
| 97 | + "invalid_row_handler" |
| 98 | + ] = None # PyArrow raises an exception by default |
| 99 | + elif self.kwds["on_bad_lines"] == ParserBase.BadLineHandleMethod.WARN: |
| 100 | + |
| 101 | + def handle_warning(invalid_row): |
| 102 | + warnings.warn( |
| 103 | + f"Expected {invalid_row.expected_columns} columns, but found " |
| 104 | + f"{invalid_row.actual_columns}: {invalid_row.text}", |
| 105 | + ParserWarning, |
| 106 | + stacklevel=find_stack_level(), |
| 107 | + ) |
| 108 | + return "skip" |
| 109 | + |
| 110 | + self.parse_options["invalid_row_handler"] = handle_warning |
| 111 | + elif self.kwds["on_bad_lines"] == ParserBase.BadLineHandleMethod.SKIP: |
| 112 | + self.parse_options["invalid_row_handler"] = lambda _: "skip" |
| 113 | + |
88 | 114 | self.convert_options = {
|
89 | 115 | option_name: option_value
|
90 | 116 | for option_name, option_value in self.kwds.items()
|
|
0 commit comments