Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 2a2d66b

Browse files
committed
feat: rename to_md5 to md5_as_hex
1 parent 23b4b21 commit 2a2d66b

File tree

14 files changed

+15
-15
lines changed

14 files changed

+15
-15
lines changed

data_diff/databases/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def render_checksum(self, c: Compiler, elem: Checksum) -> str:
404404
def render_concat(self, c: Compiler, elem: Concat) -> str:
405405
if self._prevent_overflow_when_concat:
406406
items = [
407-
f"{self.compile(c, Code(self.to_md5(self.to_string(self.compile(c, expr)))))}" for expr in elem.exprs
407+
f"{self.compile(c, Code(self.md5_as_hex(self.to_string(self.compile(c, expr)))))}" for expr in elem.exprs
408408
]
409409

410410
# We coalesce because on some DBs (e.g. MySQL) concat('a', NULL) is NULL
@@ -788,7 +788,7 @@ def md5_as_int(self, s: str) -> str:
788788
"Provide SQL for computing md5 and returning an int"
789789

790790
@abstractmethod
791-
def to_md5(self, s: str) -> str:
791+
def md5_as_hex(self, s: str) -> str:
792792
"""Method to calculate MD5"""
793793

794794
@abstractmethod

data_diff/databases/bigquery.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def parse_table_name(self, name: str) -> DbPath:
134134
def md5_as_int(self, s: str) -> str:
135135
return f"cast(cast( ('0x' || substr(TO_HEX(md5({s})), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS})) as int64) as numeric) - {CHECKSUM_OFFSET}"
136136

137-
def to_md5(self, s: str) -> str:
137+
def md5_as_hex(self, s: str) -> str:
138138
return f"md5({s})"
139139

140140
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/clickhouse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def md5_as_int(self, s: str) -> str:
105105
f"reinterpretAsUInt128(reverse(unhex(lowerUTF8(substr(hex(MD5({s})), {substr_idx}))))) - {CHECKSUM_OFFSET}"
106106
)
107107

108-
def to_md5(self, s: str) -> str:
108+
def md5_as_hex(self, s: str) -> str:
109109
return f"hex(MD5({s}))"
110110

111111
def normalize_number(self, value: str, coltype: FractionalType) -> str:

data_diff/databases/databricks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def parse_table_name(self, name: str) -> DbPath:
7676
def md5_as_int(self, s: str) -> str:
7777
return f"cast(conv(substr(md5({s}), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}), 16, 10) as decimal(38, 0)) - {CHECKSUM_OFFSET}"
7878

79-
def to_md5(self, s: str) -> str:
79+
def md5_as_hex(self, s: str) -> str:
8080
return f"md5({s})"
8181

8282
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/duckdb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def current_timestamp(self) -> str:
100100
def md5_as_int(self, s: str) -> str:
101101
return f"('0x' || SUBSTRING(md5({s}), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS},{CHECKSUM_HEXDIGITS}))::BIGINT - {CHECKSUM_OFFSET}"
102102

103-
def to_md5(self, s: str) -> str:
103+
def md5_as_hex(self, s: str) -> str:
104104
return f"md5({s})"
105105

106106
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/mssql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def normalize_number(self, value: str, coltype: NumericType) -> str:
147147
def md5_as_int(self, s: str) -> str:
148148
return f"convert(bigint, convert(varbinary, '0x' + RIGHT(CONVERT(NVARCHAR(32), HashBytes('MD5', {s}), 2), {CHECKSUM_HEXDIGITS}), 1)) - {CHECKSUM_OFFSET}"
149149

150-
def to_md5(self, s: str) -> str:
150+
def md5_as_hex(self, s: str) -> str:
151151
return f"HashBytes('MD5', {s})"
152152

153153

data_diff/databases/mysql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def set_timezone_to_utc(self) -> str:
101101
def md5_as_int(self, s: str) -> str:
102102
return f"conv(substring(md5({s}), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}), 16, 10) - {CHECKSUM_OFFSET}"
103103

104-
def to_md5(self, s: str) -> str:
104+
def md5_as_hex(self, s: str) -> str:
105105
return f"md5({s})"
106106

107107
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/oracle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def md5_as_int(self, s: str) -> str:
133133
# TODO: Find a way to use UTL_RAW.CAST_TO_BINARY_INTEGER ?
134134
return f"to_number(substr(standard_hash({s}, 'MD5'), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}), 'xxxxxxxxxxxxxxx') - {CHECKSUM_OFFSET}"
135135

136-
def to_md5(self, s: str) -> str:
136+
def md5_as_hex(self, s: str) -> str:
137137
return f"standard_hash({s}, 'MD5'"
138138

139139
def normalize_uuid(self, value: str, coltype: ColType_UUID) -> str:

data_diff/databases/postgresql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def type_repr(self, t) -> str:
9898
def md5_as_int(self, s: str) -> str:
9999
return f"('x' || substring(md5({s}), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}))::bit({_CHECKSUM_BITSIZE})::bigint - {CHECKSUM_OFFSET}"
100100

101-
def to_md5(self, s: str) -> str:
101+
def md5_as_hex(self, s: str) -> str:
102102
return f"md5({s})"
103103

104104
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/presto.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def current_timestamp(self) -> str:
128128
def md5_as_int(self, s: str) -> str:
129129
return f"cast(from_base(substr(to_hex(md5(to_utf8({s}))), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}), 16) as decimal(38, 0)) - {CHECKSUM_OFFSET}"
130130

131-
def to_md5(self, s: str) -> str:
131+
def md5_as_hex(self, s: str) -> str:
132132
return f"to_hex(md5(to_utf8({s})))"
133133

134134
def normalize_uuid(self, value: str, coltype: ColType_UUID) -> str:

data_diff/databases/redshift.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def type_repr(self, t) -> str:
4848
def md5_as_int(self, s: str) -> str:
4949
return f"strtol(substring(md5({s}), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}), 16)::decimal(38) - {CHECKSUM_OFFSET}"
5050

51-
def to_md5(self, s: str) -> str:
51+
def md5_as_hex(self, s: str) -> str:
5252
return f"md5({s})"
5353

5454
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/snowflake.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def type_repr(self, t) -> str:
7676
def md5_as_int(self, s: str) -> str:
7777
return f"BITAND(md5_number_lower64({s}), {CHECKSUM_MASK}) - {CHECKSUM_OFFSET}"
7878

79-
def to_md5(self, s: str) -> str:
79+
def md5_as_hex(self, s: str) -> str:
8080
return f"md5_number_lower64({s})"
8181

8282
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/vertica.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def current_timestamp(self) -> str:
110110
def md5_as_int(self, s: str) -> str:
111111
return f"CAST(HEX_TO_INTEGER(SUBSTRING(MD5({s}), {1 + MD5_HEXDIGITS - CHECKSUM_HEXDIGITS})) AS NUMERIC(38, 0)) - {CHECKSUM_OFFSET}"
112112

113-
def to_md5(self, s: str) -> str:
113+
def md5_as_hex(self, s: str) -> str:
114114
return f"MD5({s})"
115115

116116
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

tests/test_query.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def optimizer_hints(self, s: str):
7171
def md5_as_int(self, s: str) -> str:
7272
raise NotImplementedError
7373

74-
def to_md5(self, s: str) -> str:
74+
def md5_as_hex(self, s: str) -> str:
7575
raise NotImplementedError
7676

7777
def normalize_number(self, value: str, coltype: FractionalType) -> str:

0 commit comments

Comments
 (0)