19
19
Native_UUID ,
20
20
String_UUID ,
21
21
String_Alphanum ,
22
- String_FixedAlphanum ,
23
22
String_VaryingAlphanum ,
24
23
TemporalType ,
25
24
UnknownColType ,
@@ -133,7 +132,7 @@ def query(self, sql_ast: Union[Expr, Generator], res_type: type = list):
133
132
for row in explain :
134
133
# Most returned a 1-tuple. Presto returns a string
135
134
if isinstance (row , tuple ):
136
- row , = row
135
+ ( row ,) = row
137
136
logger .debug ("EXPLAIN: %s" , row )
138
137
answer = input ("Continue? [y/n] " )
139
138
if not answer .lower () in ["y" , "yes" ]:
@@ -240,7 +239,7 @@ def _process_table_schema(
240
239
# Return a dict of form {name: type} after normalization
241
240
return col_dict
242
241
243
- def _refine_coltypes (self , table_path : DbPath , col_dict : Dict [str , ColType ], where : str = None ):
242
+ def _refine_coltypes (self , table_path : DbPath , col_dict : Dict [str , ColType ], where : str = None , sample_size = 32 ):
244
243
"""Refine the types in the column dict, by querying the database for a sample of their values
245
244
246
245
'where' restricts the rows to be sampled.
@@ -251,7 +250,7 @@ def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], whe
251
250
return
252
251
253
252
fields = [self .normalize_uuid (c , String_UUID ()) for c in text_columns ]
254
- samples_by_row = self .query (table (* table_path ).select (* fields ).where (where or SKIP ).limit (16 ), list )
253
+ samples_by_row = self .query (table (* table_path ).select (* fields ).where (where or SKIP ).limit (sample_size ), list )
255
254
if not samples_by_row :
256
255
raise ValueError (f"Table { table_path } is empty." )
257
256
@@ -279,13 +278,7 @@ def _refine_coltypes(self, table_path: DbPath, col_dict: Dict[str, ColType], whe
279
278
)
280
279
else :
281
280
assert col_name in col_dict
282
- lens = set (map (len , alphanum_samples ))
283
- if len (lens ) > 1 :
284
- col_dict [col_name ] = String_VaryingAlphanum ()
285
- else :
286
- (length ,) = lens
287
- col_dict [col_name ] = String_FixedAlphanum (length = length )
288
- continue
281
+ col_dict [col_name ] = String_VaryingAlphanum ()
289
282
290
283
# @lru_cache()
291
284
# def get_table_schema(self, path: DbPath) -> Dict[str, ColType]:
0 commit comments