|
1 |
| -from typing import ClassVar, List, Dict, Type |
| 1 | +import logging |
| 2 | +from typing import ClassVar, List, Dict, Optional, Type |
2 | 3 |
|
3 | 4 | import attrs
|
4 | 5 |
|
|
10 | 11 | FractionalType,
|
11 | 12 | DbPath,
|
12 | 13 | TimestampTZ,
|
| 14 | + UnknownColType, |
13 | 15 | )
|
14 | 16 | from data_diff.databases.postgresql import (
|
15 | 17 | BaseDialect,
|
|
21 | 23 | PostgresqlDialect,
|
22 | 24 | )
|
23 | 25 |
|
| 26 | +logger = logging.getLogger("redshift") |
| 27 | + |
24 | 28 |
|
25 | 29 | @attrs.define(frozen=False)
|
26 | 30 | class Dialect(PostgresqlDialect):
|
@@ -183,3 +187,29 @@ def _normalize_table_path(self, path: DbPath) -> DbPath:
|
183 | 187 | raise ValueError(
|
184 | 188 | f"{self.name}: Bad table path for {self}: '{'.'.join(path)}'. Expected format: table, schema.table, or database.schema.table"
|
185 | 189 | )
|
| 190 | + |
| 191 | + def _refine_coltypes( |
| 192 | + self, table_path: DbPath, col_dict: Dict[str, ColType], where: str | None = None, sample_size=64 |
| 193 | + ): |
| 194 | + super()._refine_coltypes(table_path, col_dict, where, sample_size) |
| 195 | + refine_columns = {k for k, v in col_dict.items() if isinstance(v, (TemporalType))} |
| 196 | + if not refine_columns: |
| 197 | + return |
| 198 | + |
| 199 | + # values in a timestamp column in redshift can technically be "out of range" |
| 200 | + # this causes issues when treating them as a valid timestamp, so do not treat a column with |
| 201 | + # out of range issues as a TemporalType |
| 202 | + for column_name in refine_columns: |
| 203 | + # TRUE = strict_mode |
| 204 | + try: |
| 205 | + # check if the min + max are out of range |
| 206 | + self.query( |
| 207 | + f"select to_timestamp(max({column_name}),'YYYY-mm-dd HH24:MI:SS.US', TRUE), " |
| 208 | + + f"to_timestamp(min({column_name}),'YYYY-mm-dd HH24:MI:SS.US', TRUE) from {'.'.join(table_path)}" |
| 209 | + ) |
| 210 | + except Exception: |
| 211 | + self.query("rollback") |
| 212 | + logger.warning(f"Temporal column {column_name} has out of range values, falling back to text.") |
| 213 | + col_dict[column_name] = UnknownColType("Temporal type contains out of range values.") |
| 214 | + |
| 215 | + return |
0 commit comments