Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit b9d526f

Browse files
committed
feat: rename to_md5 to md5_as_hex
1 parent 6812670 commit b9d526f

File tree

14 files changed

+15
-15
lines changed

14 files changed

+15
-15
lines changed

data_diff/databases/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ def render_checksum(self, c: Compiler, elem: Checksum) -> str:
403403
def render_concat(self, c: Compiler, elem: Concat) -> str:
404404
if self._prevent_overflow_when_concat:
405405
items = [
406-
f"{self.compile(c, Code(self.to_md5(self.to_string(self.compile(c, expr)))))}" for expr in elem.exprs
406+
f"{self.compile(c, Code(self.md5_as_hex(self.to_string(self.compile(c, expr)))))}" for expr in elem.exprs
407407
]
408408

409409
# We coalesce because on some DBs (e.g. MySQL) concat('a', NULL) is NULL
@@ -787,7 +787,7 @@ def md5_as_int(self, s: str) -> str:
787787
"Provide SQL for computing md5 and returning an int"
788788

789789
@abstractmethod
790-
def to_md5(self, s: str) -> str:
790+
def md5_as_hex(self, s: str) -> str:
791791
"""Method to calculate MD5"""
792792

793793
@abstractmethod

data_diff/databases/bigquery.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def parse_table_name(self, name: str) -> DbPath:
134134
def md5_as_int(self, s: str) -> str:
135135
return f"cast(cast( ('0x' || substr(TO_HEX(md5({s})), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS})) as int64) as numeric) - {CHECKSUM_OFFSET}"
136136

137-
def to_md5(self, s: str) -> str:
137+
def md5_as_hex(self, s: str) -> str:
138138
return f"md5({s})"
139139

140140
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/clickhouse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def md5_as_int(self, s: str) -> str:
105105
f"reinterpretAsUInt128(reverse(unhex(lowerUTF8(substr(hex(MD5({s})), {substr_idx}))))) - {CHECKSUM_OFFSET}"
106106
)
107107

108-
def to_md5(self, s: str) -> str:
108+
def md5_as_hex(self, s: str) -> str:
109109
return f"hex(MD5({s}))"
110110

111111
def normalize_number(self, value: str, coltype: FractionalType) -> str:

data_diff/databases/databricks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def parse_table_name(self, name: str) -> DbPath:
8282
def md5_as_int(self, s: str) -> str:
8383
return f"cast(conv(substr(md5({s}), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}), 16, 10) as decimal(38, 0)) - {CHECKSUM_OFFSET}"
8484

85-
def to_md5(self, s: str) -> str:
85+
def md5_as_hex(self, s: str) -> str:
8686
return f"md5({s})"
8787

8888
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/duckdb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def current_timestamp(self) -> str:
100100
def md5_as_int(self, s: str) -> str:
101101
return f"('0x' || SUBSTRING(md5({s}), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS},{CHECKSUM_HEXDIGITS}))::BIGINT - {CHECKSUM_OFFSET}"
102102

103-
def to_md5(self, s: str) -> str:
103+
def md5_as_hex(self, s: str) -> str:
104104
return f"md5({s})"
105105

106106
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/mssql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def normalize_number(self, value: str, coltype: NumericType) -> str:
151151
def md5_as_int(self, s: str) -> str:
152152
return f"convert(bigint, convert(varbinary, '0x' + RIGHT(CONVERT(NVARCHAR(32), HashBytes('MD5', {s}), 2), {CHECKSUM_HEXDIGITS}), 1)) - {CHECKSUM_OFFSET}"
153153

154-
def to_md5(self, s: str) -> str:
154+
def md5_as_hex(self, s: str) -> str:
155155
return f"HashBytes('MD5', {s})"
156156

157157

data_diff/databases/mysql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def set_timezone_to_utc(self) -> str:
101101
def md5_as_int(self, s: str) -> str:
102102
return f"conv(substring(md5({s}), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}), 16, 10) - {CHECKSUM_OFFSET}"
103103

104-
def to_md5(self, s: str) -> str:
104+
def md5_as_hex(self, s: str) -> str:
105105
return f"md5({s})"
106106

107107
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/oracle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def md5_as_int(self, s: str) -> str:
137137
# TODO: Find a way to use UTL_RAW.CAST_TO_BINARY_INTEGER ?
138138
return f"to_number(substr(standard_hash({s}, 'MD5'), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}), 'xxxxxxxxxxxxxxx') - {CHECKSUM_OFFSET}"
139139

140-
def to_md5(self, s: str) -> str:
140+
def md5_as_hex(self, s: str) -> str:
141141
return f"standard_hash({s}, 'MD5'"
142142

143143
def normalize_uuid(self, value: str, coltype: ColType_UUID) -> str:

data_diff/databases/postgresql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def type_repr(self, t) -> str:
9898
def md5_as_int(self, s: str) -> str:
9999
return f"('x' || substring(md5({s}), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}))::bit({_CHECKSUM_BITSIZE})::bigint - {CHECKSUM_OFFSET}"
100100

101-
def to_md5(self, s: str) -> str:
101+
def md5_as_hex(self, s: str) -> str:
102102
return f"md5({s})"
103103

104104
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/presto.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def current_timestamp(self) -> str:
128128
def md5_as_int(self, s: str) -> str:
129129
return f"cast(from_base(substr(to_hex(md5(to_utf8({s}))), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}), 16) as decimal(38, 0)) - {CHECKSUM_OFFSET}"
130130

131-
def to_md5(self, s: str) -> str:
131+
def md5_as_hex(self, s: str) -> str:
132132
return f"to_hex(md5(to_utf8({s})))"
133133

134134
def normalize_uuid(self, value: str, coltype: ColType_UUID) -> str:

data_diff/databases/redshift.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def type_repr(self, t) -> str:
4848
def md5_as_int(self, s: str) -> str:
4949
return f"strtol(substring(md5({s}), {1+MD5_HEXDIGITS-CHECKSUM_HEXDIGITS}), 16)::decimal(38) - {CHECKSUM_OFFSET}"
5050

51-
def to_md5(self, s: str) -> str:
51+
def md5_as_hex(self, s: str) -> str:
5252
return f"md5({s})"
5353

5454
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/snowflake.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def type_repr(self, t) -> str:
7676
def md5_as_int(self, s: str) -> str:
7777
return f"BITAND(md5_number_lower64({s}), {CHECKSUM_MASK}) - {CHECKSUM_OFFSET}"
7878

79-
def to_md5(self, s: str) -> str:
79+
def md5_as_hex(self, s: str) -> str:
8080
return f"md5_number_lower64({s})"
8181

8282
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

data_diff/databases/vertica.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def current_timestamp(self) -> str:
110110
def md5_as_int(self, s: str) -> str:
111111
return f"CAST(HEX_TO_INTEGER(SUBSTRING(MD5({s}), {1 + MD5_HEXDIGITS - CHECKSUM_HEXDIGITS})) AS NUMERIC(38, 0)) - {CHECKSUM_OFFSET}"
112112

113-
def to_md5(self, s: str) -> str:
113+
def md5_as_hex(self, s: str) -> str:
114114
return f"MD5({s})"
115115

116116
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:

tests/test_query.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def optimizer_hints(self, s: str):
7676
def md5_as_int(self, s: str) -> str:
7777
raise NotImplementedError
7878

79-
def to_md5(self, s: str) -> str:
79+
def md5_as_hex(self, s: str) -> str:
8080
raise NotImplementedError
8181

8282
def normalize_number(self, value: str, coltype: FractionalType) -> str:

0 commit comments

Comments
 (0)