Skip to content

tests.system.test_read_gbq: test_default_dtypes[issue365-extreme-datetimes-False] failed #624

Closed
googleapis/python-db-dtypes-pandas
#180
@flaky-bot

Description

@flaky-bot

This test failed!

To configure my behavior, see the Flaky Bot documentation.

If I'm commenting on this issue too often, add the flakybot: quiet label and
I will stop commenting.


commit: 1dca732
buildURL: Build Status, Sponge
status: failed

Test output
read_gbq = functools.partial(, project_id='precise-truck-742', credentials=)
query = '\nSELECT\n  dates.row_num AS row_num,\n  date_col,\n  datetime_col,\n  timestamp_col\nFROM\n  UNNEST([\n      STRUCT(...dates`.row_num = `datetimes`.row_num\n  AND `dates`.row_num = `timestamps`.row_num\nORDER BY row_num ASC\n            '
expected =    row_num  ...                     timestamp_col
0        1  ...         0001-01-01 00:00:00+00:00
1        2  ...  9999-12-31 23:59:59.999999+00:00
2        3  ...  2262-04-11 23:47:16.854776+00:00

[3 rows x 4 columns]
use_bqstorage_apis = {False, True}, use_bqstorage_api = False

@pytest.mark.parametrize(["use_bqstorage_api"], [(True,), (False,)])
@pytest.mark.parametrize(
    ["query", "expected", "use_bqstorage_apis"],
    [
        pytest.param(
            *QueryTestCase(
                query="""
SELECT
  bools.row_num AS row_num,
  bool_col,
  bytes_col,
  date_col,
  datetime_col,
  float_col,
  int64_col,
  numeric_col,
  string_col,
  time_col,
  timestamp_col
FROM
  UNNEST([
      STRUCT(1 AS row_num, TRUE AS bool_col),
      STRUCT(2 AS row_num, FALSE AS bool_col),
      STRUCT(3 AS row_num, TRUE AS bool_col) ]) AS `bools`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST('C00010FF' AS BYTES FORMAT 'HEX') AS bytes_col),
      STRUCT(2 AS row_num, CAST('F1AC' AS BYTES FORMAT 'HEX') AS bytes_col),
      STRUCT(3 AS row_num, CAST('FFBADD11' AS BYTES FORMAT 'HEX') AS bytes_co) ]) AS `bytes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, DATE(1998, 9, 4) AS date_col),
      STRUCT(2 AS row_num, DATE(2011, 10, 1) AS date_col),
      STRUCT(3 AS row_num, DATE(2018, 4, 11) AS date_col) ]) AS `dates`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, DATETIME('1998-09-04 12:34:56.789101') AS datetime_col),
      STRUCT(2 AS row_num, DATETIME('2011-10-01 00:01:02.345678') AS datetime_col),
      STRUCT(3 AS row_num, DATETIME('2018-04-11 23:59:59.999999') AS datetime_col) ]) AS `datetimes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, 1.125 AS float_col),
      STRUCT(2 AS row_num, -2.375 AS float_col),
      STRUCT(3 AS row_num, 0.0 AS float_col) ]) AS `floats`
INNER JOIN
  UNNEST([
      -- 2 ^ 63 - 1, but in hex to avoid intermediate overlfow.
      STRUCT(1 AS row_num, 0x7fffffffffffffff AS int64_col),
      STRUCT(2 AS row_num, -1 AS in64_col),
      -- -2 ^ 63, but in hex to avoid intermediate overlfow.
      STRUCT(3 AS row_num, -0x8000000000000000 AS int64_col) ]) AS `ints`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST('123.456789' AS NUMERIC) AS numeric_col),
      STRUCT(2 AS row_num, CAST('-123.456789' AS NUMERIC) AS numeric_col),
      STRUCT(3 AS row_num, CAST('999.999999' AS NUMERIC) AS numeric_col) ]) AS `numerics`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, 'abcdefghijklmnopqrstuvwxyz' AS string_col),
      STRUCT(2 AS row_num, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' AS string_col),
      STRUCT(3 AS row_num, 'こんにちは' AS string_col) ]) AS `strings`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST('00:00:00.000000' AS TIME) AS time_col),
      STRUCT(2 AS row_num, CAST('09:08:07.654321' AS TIME) AS time_col),
      STRUCT(3 AS row_num, CAST('23:59:59.999999' AS TIME) AS time_col) ]) AS `times`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, TIMESTAMP('1998-09-04 12:34:56.789101') AS timestamp_col),
      STRUCT(2 AS row_num, TIMESTAMP('2011-10-01 00:01:02.345678') AS timestamp_col),
      STRUCT(3 AS row_num, TIMESTAMP('2018-04-11 23:59:59.999999') AS timestamp_col) ]) AS `timestamps`
WHERE
  `bools`.row_num = `dates`.row_num
  AND `bools`.row_num = `bytes`.row_num
  AND `bools`.row_num = `datetimes`.row_num
  AND `bools`.row_num = `floats`.row_num
  AND `bools`.row_num = `ints`.row_num
  AND `bools`.row_num = `numerics`.row_num
  AND `bools`.row_num = `strings`.row_num
  AND `bools`.row_num = `times`.row_num
  AND `bools`.row_num = `timestamps`.row_num
ORDER BY row_num ASC
                """,
                expected=pandas.DataFrame(
                    {
                        "row_num": pandas.Series([1, 2, 3], dtype="Int64"),
                        "bool_col": pandas.Series(
                            [True, False, True],
                            dtype="boolean"
                            if FEATURES.pandas_has_boolean_dtype
                            else "bool",
                        ),
                        "bytes_col": [
                            bytes.fromhex("C00010FF"),
                            bytes.fromhex("F1AC"),
                            bytes.fromhex("FFBADD11"),
                        ],
                        "date_col": pandas.Series(
                            [
                                datetime.date(1998, 9, 4),
                                datetime.date(2011, 10, 1),
                                datetime.date(2018, 4, 11),
                            ],
                            dtype=db_dtypes.DateDtype(),
                        ),
                        "datetime_col": pandas.Series(
                            [
                                "1998-09-04 12:34:56.789101",
                                "2011-10-01 00:01:02.345678",
                                "2018-04-11 23:59:59.999999",
                            ],
                            dtype="datetime64[ns]",
                        ),
                        "float_col": [1.125, -2.375, 0.0],
                        "int64_col": pandas.Series(
                            [(2**63) - 1, -1, -(2**63)], dtype="Int64"
                        ),
                        "numeric_col": [
                            decimal.Decimal("123.456789"),
                            decimal.Decimal("-123.456789"),
                            decimal.Decimal("999.999999"),
                        ],
                        "string_col": [
                            "abcdefghijklmnopqrstuvwxyz",
                            "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
                            "こんにちは",
                        ],
                        "time_col": pandas.Series(
                            ["00:00:00.000000", "09:08:07.654321", "23:59:59.999999"],
                            dtype=db_dtypes.TimeDtype(),
                        ),
                        "timestamp_col": pandas.Series(
                            [
                                "1998-09-04 12:34:56.789101",
                                "2011-10-01 00:01:02.345678",
                                "2018-04-11 23:59:59.999999",
                            ],
                            dtype="datetime64[ns]",
                        ).dt.tz_localize(datetime.timezone.utc),
                    }
                ),
            ),
            id="scalar-types-nonnull-normal-range",
        ),
        pytest.param(
            *QueryTestCase(
                query="""
SELECT
  bools.row_num AS row_num,
  bool_col,
  bytes_col,
  date_col,
  datetime_col,
  float_col,
  int64_col,
  numeric_col,
  string_col,
  time_col,
  timestamp_col
FROM
  UNNEST([
      STRUCT(1 AS row_num, TRUE AS bool_col),
      STRUCT(2 AS row_num, FALSE AS bool_col),
      STRUCT(3 AS row_num, NULL AS bool_col) ]) AS `bools`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, NULL AS bytes_col),
      STRUCT(2 AS row_num, CAST('F1AC' AS BYTES FORMAT 'HEX') AS bytes_col),
      STRUCT(3 AS row_num, CAST('' AS BYTES FORMAT 'HEX') AS bytes_co) ]) AS `bytes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, DATE(1970, 1, 1) AS date_col),
      STRUCT(2 AS row_num, NULL AS date_col),
      STRUCT(3 AS row_num, DATE(2018, 4, 11) AS date_col) ]) AS `dates`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, DATETIME('1970-01-01 00:00:00.000000') AS datetime_col),
      STRUCT(2 AS row_num, DATETIME('2011-10-01 00:01:02.345678') AS datetime_col),
      STRUCT(3 AS row_num, NULL AS datetime_col) ]) AS `datetimes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, NULL AS float_col),
      STRUCT(2 AS row_num, -2.375 AS float_col),
      STRUCT(3 AS row_num, 0.0 AS float_col) ]) AS `floats`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, -1 AS int64_col),
      STRUCT(2 AS row_num, NULL AS int64_col),
      STRUCT(3 AS row_num, 0 AS int64_col) ]) AS `int64s`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST('123.456789' AS NUMERIC) AS numeric_col),
      STRUCT(2 AS row_num, NULL AS numeric_col),
      STRUCT(3 AS row_num, CAST('999.999999' AS NUMERIC) AS numeric_col) ]) AS `numerics`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, '' AS string_col),
      STRUCT(2 AS row_num, 'こんにちは' AS string_col),
      STRUCT(3 AS row_num, NULL AS string_col) ]) AS `strings`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, NULL AS time_col),
      STRUCT(2 AS row_num, CAST('00:00:00.000000' AS TIME) AS time_col),
      STRUCT(3 AS row_num, CAST('23:59:59.999999' AS TIME) AS time_col) ]) AS `times`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, TIMESTAMP('1970-01-01 00:00:00.000000') AS timestamp_col),
      STRUCT(2 AS row_num, NULL AS timestamp_col),
      STRUCT(3 AS row_num, TIMESTAMP('2018-04-11 23:59:59.999999') AS timestamp_col) ]) AS `timestamps`
WHERE
  `bools`.row_num = `dates`.row_num
  AND `bools`.row_num = `bytes`.row_num
  AND `bools`.row_num = `datetimes`.row_num
  AND `bools`.row_num = `floats`.row_num
  AND `bools`.row_num = `int64s`.row_num
  AND `bools`.row_num = `numerics`.row_num
  AND `bools`.row_num = `strings`.row_num
  AND `bools`.row_num = `times`.row_num
  AND `bools`.row_num = `timestamps`.row_num
ORDER BY row_num ASC
            """,
                expected=pandas.DataFrame(
                    {
                        "row_num": pandas.Series([1, 2, 3], dtype="Int64"),
                        "bool_col": pandas.Series(
                            [True, False, None],
                            dtype="boolean"
                            if FEATURES.pandas_has_boolean_dtype
                            else "object",
                        ),
                        "bytes_col": [None, bytes.fromhex("F1AC"), b""],
                        "date_col": pandas.Series(
                            [
                                datetime.date(1970, 1, 1),
                                None,
                                datetime.date(2018, 4, 11),
                            ],
                            dtype=db_dtypes.DateDtype(),
                        ),
                        "datetime_col": pandas.Series(
                            [
                                "1970-01-01 00:00:00.000000",
                                "2011-10-01 00:01:02.345678",
                                None,
                            ],
                            dtype="datetime64[ns]",
                        ),
                        "float_col": [None, -2.375, 0.0],
                        "int64_col": pandas.Series([-1, None, 0], dtype="Int64"),
                        "numeric_col": [
                            decimal.Decimal("123.456789"),
                            None,
                            decimal.Decimal("999.999999"),
                        ],
                        "string_col": ["", "こんにちは", None],
                        "time_col": pandas.Series(
                            [None, "00:00:00", "23:59:59.999999"],
                            dtype=db_dtypes.TimeDtype(),
                        ),
                        "timestamp_col": pandas.Series(
                            [
                                "1970-01-01 00:00:00.000000",
                                None,
                                "2018-04-11 23:59:59.999999",
                            ],
                            dtype="datetime64[ns]",
                        ).dt.tz_localize(datetime.timezone.utc),
                    }
                ),
            ),
            id="scalar-types-nullable-normal-range",
        ),
        pytest.param(
            *QueryTestCase(
                query="""
SELECT
  bools.row_num AS row_num,
  bool_col,
  bytes_col,
  date_col,
  datetime_col,
  float_col,
  int64_col,
  numeric_col,
  string_col,
  time_col,
  timestamp_col
FROM
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS BOOL) AS bool_col) ]) AS `bools`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS BYTES) AS bytes_col) ]) AS `bytes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS DATE) AS date_col) ]) AS `dates`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS DATETIME) AS datetime_col) ]) AS `datetimes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS FLOAT64) AS float_col) ]) AS `floats`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS INT64) AS int64_col) ]) AS `int64s`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS NUMERIC) AS numeric_col) ]) AS `numerics`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS STRING) AS string_col) ]) AS `strings`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS TIME) AS time_col) ]) AS `times`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS TIMESTAMP) AS timestamp_col) ]) AS `timestamps`
WHERE
  `bools`.row_num = `dates`.row_num
  AND `bools`.row_num = `bytes`.row_num
  AND `bools`.row_num = `datetimes`.row_num
  AND `bools`.row_num = `floats`.row_num
  AND `bools`.row_num = `int64s`.row_num
  AND `bools`.row_num = `numerics`.row_num
  AND `bools`.row_num = `strings`.row_num
  AND `bools`.row_num = `times`.row_num
  AND `bools`.row_num = `timestamps`.row_num
ORDER BY row_num ASC
            """,
                expected=pandas.DataFrame(
                    {
                        "row_num": pandas.Series([1], dtype="Int64"),
                        "bool_col": pandas.Series(
                            [None],
                            dtype="boolean"
                            if FEATURES.pandas_has_boolean_dtype
                            else "object",
                        ),
                        "bytes_col": [None],
                        "date_col": pandas.Series(
                            [None],
                            dtype=db_dtypes.DateDtype(),
                        ),
                        "datetime_col": pandas.Series(
                            [None],
                            dtype="datetime64[ns]",
                        ),
                        "float_col": pandas.Series([None], dtype="float64"),
                        "int64_col": pandas.Series([None], dtype="Int64"),
                        "numeric_col": [None],
                        "string_col": [None],
                        "time_col": pandas.Series(
                            [None],
                            dtype=db_dtypes.TimeDtype(),
                        ),
                        "timestamp_col": pandas.Series(
                            [None],
                            dtype="datetime64[ns]",
                        ).dt.tz_localize(datetime.timezone.utc),
                    }
                ),
            ),
            id="scalar-types-null",
        ),
        pytest.param(
            *QueryTestCase(
                query="""
SELECT
  bignumerics.row_num AS row_num,
  bignumeric_col,
  nullable_col,
  null_col
FROM
  UNNEST([
      STRUCT(1 AS row_num, CAST('123456789.123456789' AS BIGNUMERIC) AS bignumeric_col),
      STRUCT(2 AS row_num, CAST('-123456789.123456789' AS BIGNUMERIC) AS bignumeric_col),
      STRUCT(3 AS row_num, CAST('987654321.987654321' AS BIGNUMERIC) AS bignumeric_col) ]) AS `bignumerics`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST('123456789.123456789' AS BIGNUMERIC) AS nullable_col),
      STRUCT(2 AS row_num, NULL AS nullable_col),
      STRUCT(3 AS row_num, CAST('987654321.987654321' AS BIGNUMERIC) AS nullable_col) ]) AS `nullables`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS BIGNUMERIC) AS null_col),
      STRUCT(2 AS row_num, CAST(NULL AS BIGNUMERIC) AS null_col),
      STRUCT(3 AS row_num, CAST(NULL AS BIGNUMERIC) AS null_col) ]) AS `nulls`
WHERE
  `bignumerics`.row_num = `nullables`.row_num
  AND `bignumerics`.row_num = `nulls`.row_num
ORDER BY row_num ASC
            """,
                expected=pandas.DataFrame(
                    {
                        "row_num": pandas.Series([1, 2, 3], dtype="Int64"),
                        # TODO: Support a special (nullable) dtype for decimal data.
                        # https://github.com/googleapis/python-db-dtypes-pandas/issues/49
                        "bignumeric_col": [
                            decimal.Decimal("123456789.123456789"),
                            decimal.Decimal("-123456789.123456789"),
                            decimal.Decimal("987654321.987654321"),
                        ],
                        "nullable_col": [
                            decimal.Decimal("123456789.123456789"),
                            None,
                            decimal.Decimal("987654321.987654321"),
                        ],
                        "null_col": [None, None, None],
                    }
                ),
            ),
            id="bignumeric-normal-range",
            marks=pytest.mark.skipif(
                not FEATURES.bigquery_has_bignumeric,
                reason="BIGNUMERIC not supported in this version of google-cloud-bigquery",
            ),
        ),
        pytest.param(
            *QueryTestCase(
                query="""
SELECT
  dates.row_num AS row_num,
  date_col,
  datetime_col,
  timestamp_col
FROM
  UNNEST([
      STRUCT(1 AS row_num, DATE(1, 1, 1) AS date_col),
      STRUCT(2 AS row_num, DATE(9999, 12, 31) AS date_col),
      STRUCT(3 AS row_num, DATE(2262, 4, 12) AS date_col) ]) AS `dates`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, DATETIME('0001-01-01 00:00:00.000000') AS datetime_col),
      STRUCT(2 AS row_num, DATETIME('9999-12-31 23:59:59.999999') AS datetime_col),
      STRUCT(3 AS row_num, DATETIME('2262-04-11 23:47:16.854776') AS datetime_col) ]) AS `datetimes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, TIMESTAMP('0001-01-01 00:00:00.000000') AS timestamp_col),
      STRUCT(2 AS row_num, TIMESTAMP('9999-12-31 23:59:59.999999') AS timestamp_col),
      STRUCT(3 AS row_num, TIMESTAMP('2262-04-11 23:47:16.854776') AS timestamp_col) ]) AS `timestamps`
WHERE
  `dates`.row_num = `datetimes`.row_num
  AND `dates`.row_num = `timestamps`.row_num
ORDER BY row_num ASC
            """,
                expected=pandas.DataFrame(
                    {
                        "row_num": pandas.Series([1, 2, 3], dtype="Int64"),
                        "date_col": pandas.Series(
                            [
                                datetime.date(1, 1, 1),
                                datetime.date(9999, 12, 31),
                                datetime.date(2262, 4, 12),
                            ],
                            dtype="object",
                        ),
                        "datetime_col": pandas.Series(
                            [
                                datetime.datetime(1, 1, 1, 0, 0, 0, 0),
                                datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
                                # One microsecond more than pandas.Timestamp.max.
                                datetime.datetime(2262, 4, 11, 23, 47, 16, 854776),
                            ],
                            dtype="object",
                        ),
                        "timestamp_col": pandas.Series(
                            [
                                datetime.datetime(
                                    1, 1, 1, 0, 0, 0, 0, tzinfo=datetime.timezone.utc
                                ),
                                datetime.datetime(
                                    9999,
                                    12,
                                    31,
                                    23,
                                    59,
                                    59,
                                    999999,
                                    tzinfo=datetime.timezone.utc,
                                ),
                                # One microsecond more than pandas.Timestamp.max.
                                datetime.datetime(
                                    2262,
                                    4,
                                    11,
                                    23,
                                    47,
                                    16,
                                    854776,
                                    tzinfo=datetime.timezone.utc,
                                ),
                            ],
                            dtype="object",
                        ),
                    }
                ),
                use_bqstorage_apis={True, False}
                if FEATURES.bigquery_has_accurate_timestamp
                else {True},
            ),
            id="issue365-extreme-datetimes",
        ),
    ],
)
def test_default_dtypes(
    read_gbq, query, expected, use_bqstorage_apis, use_bqstorage_api
):
    if use_bqstorage_api not in use_bqstorage_apis:
        pytest.skip(f"use_bqstorage_api={use_bqstorage_api} not supported.")
    # the parameter useQueryCache=False is used in the following function call
    # to avoid a failing test due to cached data that may be out of order.
    result = read_gbq(
        query,
        use_bqstorage_api=use_bqstorage_api,
        configuration={"query": {"useQueryCache": False}},
    )
  pandas.testing.assert_frame_equal(result, expected)

E AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="date_col") are different
E
E Attribute "dtype" are different
E [left]: dbdate
E [right]: object

tests/system/test_read_gbq.py:557: AssertionError

Metadata

Metadata

Assignees

Labels

api: bigqueryIssues related to the googleapis/python-bigquery-pandas API.flakybot: flakyTells the Flaky Bot not to close or comment on this issue.flakybot: issueAn issue filed by the Flaky Bot. Should not be added manually.priority: p1Important issue which blocks shipping the next release. Will be fixed prior to next release.type: bugError or flaw in code with unintended results or allowing sub-optimal usage patterns.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions