diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 70b07e08cf760..7c2a1199bdf0e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -220,6 +220,7 @@ I/O - Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`) - Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`) - Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`) +- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`) - Period diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5678133d5a706..b7efb4a8d6947 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -79,7 +79,12 @@ def _process_parse_dates_argument(parse_dates): def _handle_date_column(col, utc=None, format=None): if isinstance(format, dict): - return to_datetime(col, errors="ignore", **format) + # GH35185 Allow custom error values in parse_dates argument of + # read_sql like functions. + # Format can take on custom to_datetime argument values such as + # {"errors": "coerce"} or {"dayfirst": True} + error = format.pop("errors", None) or "ignore" + return to_datetime(col, errors=error, **format) else: # Allow passing of formatting string for integers # GH17855 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 0195b61d13798..497039de99196 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -369,6 +369,54 @@ def _load_test3_data(self): self.test_frame3 = DataFrame(data, columns=columns) + def _load_types_test_data(self, data): + def _filter_to_flavor(flavor, df): + flavor_dtypes = { + "sqlite": { + "TextCol": "str", + "DateCol": "str", + "IntDateCol": "int64", + "IntDateOnlyCol": "int64", + "FloatCol": "float", + "IntCol": "int64", + "BoolCol": "int64", + "IntColWithNull": "float", + "BoolColWithNull": "float", + }, + "mysql": { + "TextCol": "str", + "DateCol": "str", + "IntDateCol": "int64", + "IntDateOnlyCol": "int64", + "FloatCol": "float", + "IntCol": "int64", + "BoolCol": "bool", + "IntColWithNull": "float", + "BoolColWithNull": "float", + }, + "postgresql": { + "TextCol": "str", + "DateCol": "str", + "DateColWithTz": "str", + "IntDateCol": "int64", + "IntDateOnlyCol": "int64", + "FloatCol": "float", + "IntCol": "int64", + "BoolCol": "bool", + "IntColWithNull": "float", + "BoolColWithNull": "float", + }, + } + + dtypes = flavor_dtypes[flavor] + return df[dtypes.keys()].astype(dtypes) + + df = DataFrame(data) + self.types_test = { + flavor: _filter_to_flavor(flavor, df) + for flavor in ("sqlite", "mysql", "postgresql") + } + def _load_raw_sql(self): self.drop_table("types_test_data") self._get_exec().execute(SQL_STRINGS["create_test_types"][self.flavor]) @@ -405,6 +453,8 @@ def _load_raw_sql(self): ins["query"], [d[field] for field in ins["fields"]] ) + self._load_types_test_data(data) + def _count_rows(self, table_name): result = ( self._get_exec() @@ -741,6 +791,36 @@ def test_date_parsing(self): Timestamp("2010-12-12"), ] + @pytest.mark.parametrize("error", ["ignore", "raise", "coerce"]) + @pytest.mark.parametrize( + "read_sql, text, mode", + [ + (sql.read_sql, "SELECT * FROM types_test_data", ("sqlalchemy", "fallback")), + (sql.read_sql, "types_test_data", ("sqlalchemy")), + ( + sql.read_sql_query, + "SELECT * FROM types_test_data", + ("sqlalchemy", "fallback"), + ), + (sql.read_sql_table, "types_test_data", ("sqlalchemy")), + ], + ) + def test_custom_dateparsing_error(self, read_sql, text, mode, error): + if self.mode in mode: + expected = self.types_test[self.flavor].astype( + {"DateCol": "datetime64[ns]"} + ) + + result = read_sql( + text, + con=self.conn, + parse_dates={ + "DateCol": {"errors": error}, + }, + ) + + tm.assert_frame_equal(result, expected) + def test_date_and_index(self): # Test case where same column appears in parse_date and index_col