Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 32a7ad5

Browse files
author
Sergey Vasilyev
committed
Relax the checking of cross-type column matching on demand
And ensure that both columns have the same set of attributes and follow the same protocol before actually using it — i.e. do not simply imply it as given.
1 parent f8600dd commit 32a7ad5

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

data_diff/hashdiff_tables.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import logging
44
from collections import defaultdict
55
from typing import Iterator
6-
from operator import attrgetter
76

87
import attrs
98

@@ -82,7 +81,7 @@ def __attrs_post_init__(self):
8281
if self.bisection_factor < 2:
8382
raise ValueError("Must have at least two segments per iteration (i.e. bisection_factor >= 2)")
8483

85-
def _validate_and_adjust_columns(self, table1, table2):
84+
def _validate_and_adjust_columns(self, table1, table2, *, strict: bool = True):
8685
for c1, c2 in safezip(table1.relevant_columns, table2.relevant_columns):
8786
if c1 not in table1._schema:
8887
raise ValueError(f"Column '{c1}' not found in schema for table {table1}")
@@ -92,23 +91,23 @@ def _validate_and_adjust_columns(self, table1, table2):
9291
# Update schemas to minimal mutual precision
9392
col1 = table1._schema[c1]
9493
col2 = table2._schema[c2]
95-
if isinstance(col1, PrecisionType):
96-
if not isinstance(col2, PrecisionType):
94+
if isinstance(col1, PrecisionType) and isinstance(col2, PrecisionType):
95+
if strict and not isinstance(col2, PrecisionType):
9796
raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}")
9897

99-
lowest = min(col1, col2, key=attrgetter("precision"))
98+
lowest = min(col1, col2, key=lambda col: col.precision)
10099

101100
if col1.precision != col2.precision:
102101
logger.warning(f"Using reduced precision {lowest} for column '{c1}'. Types={col1}, {col2}")
103102

104103
table1._schema[c1] = attrs.evolve(col1, precision=lowest.precision, rounds=lowest.rounds)
105104
table2._schema[c2] = attrs.evolve(col2, precision=lowest.precision, rounds=lowest.rounds)
106105

107-
elif isinstance(col1, (NumericType, Boolean)):
108-
if not isinstance(col2, (NumericType, Boolean)):
106+
elif isinstance(col1, (NumericType, Boolean)) and isinstance(col2, (NumericType, Boolean)):
107+
if strict and not isinstance(col2, (NumericType, Boolean)):
109108
raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}")
110109

111-
lowest = min(col1, col2, key=attrgetter("precision"))
110+
lowest = min(col1, col2, key=lambda col: col.precision)
112111

113112
if col1.precision != col2.precision:
114113
logger.warning(f"Using reduced precision {lowest} for column '{c1}'. Types={col1}, {col2}")
@@ -119,11 +118,11 @@ def _validate_and_adjust_columns(self, table1, table2):
119118
table2._schema[c2] = attrs.evolve(col2, precision=lowest.precision)
120119

121120
elif isinstance(col1, ColType_UUID):
122-
if not isinstance(col2, ColType_UUID):
121+
if strict and not isinstance(col2, ColType_UUID):
123122
raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}")
124123

125124
elif isinstance(col1, StringType):
126-
if not isinstance(col2, StringType):
125+
if strict and not isinstance(col2, StringType):
127126
raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}")
128127

129128
for t in [table1, table2]:

0 commit comments

Comments
 (0)