Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 1bcc12c

Browse files
committed
out of range temporal cols as UnknownColType
issue 786 #786
1 parent 283bbac commit 1bcc12c

File tree

1 file changed

+31
-1
lines changed

1 file changed

+31
-1
lines changed

data_diff/databases/redshift.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from typing import ClassVar, List, Dict, Type
1+
import logging
2+
from typing import ClassVar, List, Dict, Optional, Type
23

34
import attrs
45

@@ -10,6 +11,7 @@
1011
FractionalType,
1112
DbPath,
1213
TimestampTZ,
14+
UnknownColType,
1315
)
1416
from data_diff.databases.postgresql import (
1517
BaseDialect,
@@ -21,6 +23,8 @@
2123
PostgresqlDialect,
2224
)
2325

26+
logger = logging.getLogger("redshift")
27+
2428

2529
@attrs.define(frozen=False)
2630
class Dialect(PostgresqlDialect):
@@ -183,3 +187,29 @@ def _normalize_table_path(self, path: DbPath) -> DbPath:
183187
raise ValueError(
184188
f"{self.name}: Bad table path for {self}: '{'.'.join(path)}'. Expected format: table, schema.table, or database.schema.table"
185189
)
190+
191+
def _refine_coltypes(
192+
self, table_path: DbPath, col_dict: Dict[str, ColType], where: str | None = None, sample_size=64
193+
):
194+
super()._refine_coltypes(table_path, col_dict, where, sample_size)
195+
refine_columns = {k for k, v in col_dict.items() if isinstance(v, (TemporalType))}
196+
if not refine_columns:
197+
return
198+
199+
# values in a timestamp column in redshift can technically be "out of range"
200+
# this causes issues when treating them as a valid timestamp, so do not treat a column with
201+
# out of range issues as a TemporalType
202+
for column_name in refine_columns:
203+
# TRUE = strict_mode
204+
try:
205+
# check if the min + max are out of range
206+
self.query(
207+
f"select to_timestamp(max({column_name}),'YYYY-mm-dd HH24:MI:SS.US', TRUE), "
208+
+ f"to_timestamp(min({column_name}),'YYYY-mm-dd HH24:MI:SS.US', TRUE) from {'.'.join(table_path)}"
209+
)
210+
except Exception:
211+
self.query("rollback")
212+
logger.warning(f"Temporal column {column_name} has out of range values, falling back to text.")
213+
col_dict[column_name] = UnknownColType("Temporal type contains out of range values.")
214+
215+
return

0 commit comments

Comments
 (0)