Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 444d0a9

Browse files
committed
skip normalizing problematic timestamp values
#786
1 parent 283bbac commit 444d0a9

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

data_diff/databases/redshift.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ def md5_as_hex(self, s: str) -> str:
5454
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
5555
if coltype.rounds:
5656
timestamp = f"{value}::timestamp(6)"
57+
58+
# redshift allows some problematic timestamp values, don't normalize those (epoch calcs fail)
59+
case_start = (
60+
f"case when "
61+
+ f"({timestamp} < '1901-01-01 00:00'::timestamp(6) or {timestamp} >= '2038-01-01'::timestamp(6))"
62+
+ f"then {timestamp}::varchar else"
63+
)
64+
case_end = f"end"
65+
5766
# Get seconds since epoch. Redshift doesn't support milli- or micro-seconds.
5867
secs = f"timestamp 'epoch' + round(extract(epoch from {timestamp})::decimal(38)"
5968
# Get the milliseconds from timestamp.
@@ -65,8 +74,14 @@ def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
6574
timestamp6 = (
6675
f"to_char({epoch}, -6+{coltype.precision}) * interval '0.000001 seconds', 'YYYY-mm-dd HH24:MI:SS.US')"
6776
)
77+
padded = self._add_padding(coltype, timestamp6)
78+
return f"{case_start} {padded} {case_end}"
6879
else:
6980
timestamp6 = f"to_char({value}::timestamp(6), 'YYYY-mm-dd HH24:MI:SS.US')"
81+
padded = self._add_padding(coltype, timestamp6)
82+
return padded
83+
84+
def _add_padding(self, coltype: TemporalType, timestamp6: str):
7085
return (
7186
f"RPAD(LEFT({timestamp6}, {TIMESTAMP_PRECISION_POS+coltype.precision}), {TIMESTAMP_PRECISION_POS+6}, '0')"
7287
)

0 commit comments

Comments
 (0)