Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit d268ff7

Browse files
author
Sergey Vasilyev
committed
Relax the checking of cross-type column matching on demand
And ensure that both columns have the same set of attributes and follow the same protocol before actually using it — i.e. do not simply imply it as given.
1 parent 5153196 commit d268ff7

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

data_diff/hashdiff_tables.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import logging
44
from collections import defaultdict
55
from typing import Iterator
6-
from operator import attrgetter
76

87
import attrs
98

@@ -83,7 +82,7 @@ def __attrs_post_init__(self):
8382
if self.bisection_factor < 2:
8483
raise ValueError("Must have at least two segments per iteration (i.e. bisection_factor >= 2)")
8584

86-
def _validate_and_adjust_columns(self, table1, table2):
85+
def _validate_and_adjust_columns(self, table1, table2, *, strict: bool = True):
8786
for c1, c2 in safezip(table1.relevant_columns, table2.relevant_columns):
8887
if c1 not in table1._schema:
8988
raise ValueError(f"Column '{c1}' not found in schema for table {table1}")
@@ -93,23 +92,23 @@ def _validate_and_adjust_columns(self, table1, table2):
9392
# Update schemas to minimal mutual precision
9493
col1 = table1._schema[c1]
9594
col2 = table2._schema[c2]
96-
if isinstance(col1, PrecisionType):
97-
if not isinstance(col2, PrecisionType):
95+
if isinstance(col1, PrecisionType) and isinstance(col2, PrecisionType):
96+
if strict and not isinstance(col2, PrecisionType):
9897
raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}")
9998

100-
lowest = min(col1, col2, key=attrgetter("precision"))
99+
lowest = min(col1, col2, key=lambda col: col.precision)
101100

102101
if col1.precision != col2.precision:
103102
logger.warning(f"Using reduced precision {lowest} for column '{c1}'. Types={col1}, {col2}")
104103

105104
table1._schema[c1] = attrs.evolve(col1, precision=lowest.precision, rounds=lowest.rounds)
106105
table2._schema[c2] = attrs.evolve(col2, precision=lowest.precision, rounds=lowest.rounds)
107106

108-
elif isinstance(col1, (NumericType, Boolean)):
109-
if not isinstance(col2, (NumericType, Boolean)):
107+
elif isinstance(col1, (NumericType, Boolean)) and isinstance(col2, (NumericType, Boolean)):
108+
if strict and not isinstance(col2, (NumericType, Boolean)):
110109
raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}")
111110

112-
lowest = min(col1, col2, key=attrgetter("precision"))
111+
lowest = min(col1, col2, key=lambda col: col.precision)
113112

114113
if col1.precision != col2.precision:
115114
logger.warning(f"Using reduced precision {lowest} for column '{c1}'. Types={col1}, {col2}")
@@ -120,11 +119,11 @@ def _validate_and_adjust_columns(self, table1, table2):
120119
table2._schema[c2] = attrs.evolve(col2, precision=lowest.precision)
121120

122121
elif isinstance(col1, ColType_UUID):
123-
if not isinstance(col2, ColType_UUID):
122+
if strict and not isinstance(col2, ColType_UUID):
124123
raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}")
125124

126125
elif isinstance(col1, StringType):
127-
if not isinstance(col2, StringType):
126+
if strict and not isinstance(col2, StringType):
128127
raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}")
129128

130129
for t in [table1, table2]:

0 commit comments

Comments
 (0)