@@ -71,7 +71,8 @@ class HashDiffer(TableDiffer):
71
71
"""
72
72
73
73
bisection_factor : int = DEFAULT_BISECTION_FACTOR
74
- bisection_threshold : Number = DEFAULT_BISECTION_THRESHOLD # Accepts inf for tests
74
+ bisection_threshold : int = DEFAULT_BISECTION_THRESHOLD
75
+ bisection_disabled : bool = False # i.e. always download the rows (used in tests)
75
76
76
77
stats : dict = attrs .field (factory = dict )
77
78
@@ -157,7 +158,7 @@ def _diff_segments(
157
158
# default, data-diff will checksum the section first (when it's below
158
159
# the threshold) and _then_ download it.
159
160
if BENCHMARK :
160
- if max_rows < self .bisection_threshold :
161
+ if self . bisection_disabled or max_rows < self .bisection_threshold :
161
162
return self ._bisect_and_diff_segments (ti , table1 , table2 , info_tree , level = level , max_rows = max_rows )
162
163
163
164
(count1 , checksum1 ), (count2 , checksum2 ) = self ._threaded_call ("count_and_checksum" , [table1 , table2 ])
@@ -202,7 +203,7 @@ def _bisect_and_diff_segments(
202
203
203
204
# If count is below the threshold, just download and compare the columns locally
204
205
# This saves time, as bisection speed is limited by ping and query performance.
205
- if max_rows < self .bisection_threshold or max_space_size < self .bisection_factor * 2 :
206
+ if self . bisection_disabled or max_rows < self .bisection_threshold or max_space_size < self .bisection_factor * 2 :
206
207
rows1 , rows2 = self ._threaded_call ("get_values" , [table1 , table2 ])
207
208
json_cols = {
208
209
i : colname
0 commit comments