diff --git a/ci/requirements-3.7-0.24.2.conda b/ci/requirements-3.7-0.24.2.conda index 82f4e7b9..e0323d92 100644 --- a/ci/requirements-3.7-0.24.2.conda +++ b/ci/requirements-3.7-0.24.2.conda @@ -1,6 +1,6 @@ codecov coverage -db-dtypes==0.3.0 +db-dtypes==0.3.1 fastavro flake8 numpy==1.16.6 diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index 5422402e..315ad5cd 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -90,12 +90,13 @@ def cast_dataframe_for_parquet( # Use extension dtype first so that it uses the correct equality operator. and db_dtypes.DateDtype() != dataframe[column_name].dtype ): - # Construct converted column manually, because I can't use - # .astype() with DateDtype. With .astype(), I get the error: - # - # TypeError: Cannot interpret '' as a data type - cast_column = pandas.Series( - dataframe[column_name], dtype=db_dtypes.DateDtype() + cast_column = dataframe[column_name].astype( + dtype=db_dtypes.DateDtype(), + # Return the original column if there was an error converting + # to the dtype, such as is there is a date outside the + # supported range. + # https://github.com/googleapis/python-bigquery-pandas/issues/441 + errors="ignore", ) elif column_type in {"NUMERIC", "DECIMAL", "BIGNUMERIC", "BIGDECIMAL"}: cast_column = dataframe[column_name].map(decimal.Decimal) diff --git a/setup.py b/setup.py index 28c81eee..283e5ea8 100644 --- a/setup.py +++ b/setup.py @@ -23,16 +23,16 @@ release_status = "Development Status :: 4 - Beta" dependencies = [ "setuptools", - "db-dtypes >=0.3.0,<2.0.0", - "numpy>=1.16.6", - "pandas>=0.24.2", + "db-dtypes >=0.3.1,<2.0.0", + "numpy >=1.16.6", + "pandas >=0.24.2", "pyarrow >=3.0.0, <7.0dev", "pydata-google-auth", "google-auth", "google-auth-oauthlib", # 2.4.* has a bug where waiting for the query can hang indefinitely. # https://github.com/pydata/pandas-gbq/issues/343 - "google-cloud-bigquery[bqstorage,pandas]>=1.11.1,<3.0.0dev,!=2.4.*", + "google-cloud-bigquery[bqstorage,pandas] >=1.11.1,<3.0.0dev,!=2.4.*", ] extras = { "tqdm": "tqdm>=4.23.0", diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 7920656a..6c3080dc 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -5,7 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 -db-dtypes==0.3.0 +db-dtypes==0.3.1 google-auth==1.4.1 google-auth-oauthlib==0.0.1 google-cloud-bigquery==1.11.1 diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py index 4421f3be..f7184024 100644 --- a/tests/system/test_to_gbq.py +++ b/tests/system/test_to_gbq.py @@ -188,6 +188,54 @@ def test_series_round_trip( {"name": "num_col", "type": "NUMERIC"}, ], ), + pytest.param( + *DataFrameRoundTripTestCase( + input_df=pandas.DataFrame( + { + "row_num": [1, 2, 3], + # DATE valuess outside the pandas range for timestamp + # aren't supported by the db-dtypes package. + # https://github.com/googleapis/python-bigquery-pandas/issues/441 + "date_col": [ + datetime.date(1, 1, 1), + datetime.date(1970, 1, 1), + datetime.date(9999, 12, 31), + ], + # TODO: DATETIME/TIMESTAMP values outside of the range for + # pandas timestamp require `date_as_object` parameter in + # google-cloud-bigquery versions 1.x and 2.x. + # https://github.com/googleapis/python-bigquery-pandas/issues/365 + # "datetime_col": [ + # datetime.datetime(1, 1, 1), + # datetime.datetime(1970, 1, 1), + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # ], + # "timestamp_col": [ + # datetime.datetime(1, 1, 1, tzinfo=datetime.timezone.utc), + # datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc), + # datetime.datetime( + # 9999, + # 12, + # 31, + # 23, + # 59, + # 59, + # 999999, + # tzinfo=datetime.timezone.utc, + # ), + # ], + }, + columns=["row_num", "date_col", "datetime_col", "timestamp_col"], + ), + table_schema=[ + {"name": "row_num", "type": "INTEGER"}, + {"name": "date_col", "type": "DATE"}, + {"name": "datetime_col", "type": "DATETIME"}, + {"name": "timestamp_col", "type": "TIMESTAMP"}, + ], + ), + id="issue365-extreme-datetimes", + ), ]