Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 3cdc103

Browse files
authored
Merge pull request #254 from datafold/issue252
Deprecate use of FixedAlphanum
2 parents d181274 + 68e6228 commit 3cdc103

File tree

1 file changed

+4
-11
lines changed

1 file changed

+4
-11
lines changed

data_diff/databases/base.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
Native_UUID,
2020
String_UUID,
2121
String_Alphanum,
22-
String_FixedAlphanum,
2322
String_VaryingAlphanum,
2423
TemporalType,
2524
UnknownColType,
@@ -133,7 +132,7 @@ def query(self, sql_ast: Union[Expr, Generator], res_type: type = list):
133132
for row in explain:
134133
# Most returned a 1-tuple. Presto returns a string
135134
if isinstance(row, tuple):
136-
row ,= row
135+
(row,) = row
137136
logger.debug("EXPLAIN: %s", row)
138137
answer = input("Continue? [y/n] ")
139138
if not answer.lower() in ["y", "yes"]:
@@ -240,7 +239,7 @@ def _process_table_schema(
240239
# Return a dict of form {name: type} after normalization
241240
return col_dict
242241

243-
def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], where: str = None):
242+
def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], where: str = None, sample_size=32):
244243
"""Refine the types in the column dict, by querying the database for a sample of their values
245244
246245
'where' restricts the rows to be sampled.
@@ -251,7 +250,7 @@ def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], whe
251250
return
252251

253252
fields = [self.normalize_uuid(c, String_UUID()) for c in text_columns]
254-
samples_by_row = self.query(table(*table_path).select(*fields).where(where or SKIP).limit(16), list)
253+
samples_by_row = self.query(table(*table_path).select(*fields).where(where or SKIP).limit(sample_size), list)
255254
if not samples_by_row:
256255
raise ValueError(f"Table {table_path} is empty.")
257256

@@ -279,13 +278,7 @@ def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], whe
279278
)
280279
else:
281280
assert col_name in col_dict
282-
lens = set(map(len, alphanum_samples))
283-
if len(lens) > 1:
284-
col_dict[col_name] = String_VaryingAlphanum()
285-
else:
286-
(length,) = lens
287-
col_dict[col_name] = String_FixedAlphanum(length=length)
288-
continue
281+
col_dict[col_name] = String_VaryingAlphanum()
289282

290283
# @lru_cache()
291284
# def get_table_schema(self, path: DbPath) -> Dict[str, ColType]:

0 commit comments

Comments
 (0)