File tree Expand file tree Collapse file tree 1 file changed +3
-6
lines changed Expand file tree Collapse file tree 1 file changed +3
-6
lines changed Original file line number Diff line number Diff line change @@ -441,9 +441,7 @@ def _set_vocab_sentencepiece(self):
441
441
442
442
if vocab_size > len (tokens ):
443
443
pad_count = vocab_size - len (tokens )
444
- print (
445
- f"Padding vocab with { pad_count } token(s) - [PAD1] through [PAD{ pad_count } ]"
446
- )
444
+ logger .debug (f"Padding vocab with { pad_count } token(s) - [PAD1] through [PAD{ pad_count } ]" )
447
445
for i in range (1 , pad_count + 1 ):
448
446
tokens .append (f"[PAD{ i } ]" )
449
447
scores .append (- 1000.0 )
@@ -2065,8 +2063,7 @@ def set_vocab(self):
2065
2063
tokenizer_path = self .dir_model / 'tokenizer.model'
2066
2064
2067
2065
if not tokenizer_path .is_file ():
2068
- print (f'Error: Missing { tokenizer_path } ' , file = sys .stderr )
2069
- sys .exit (1 )
2066
+ raise ValueError (f'Error: Missing { tokenizer_path } ' )
2070
2067
2071
2068
tokenizer = SentencePieceProcessor (str (tokenizer_path ))
2072
2069
@@ -2104,7 +2101,7 @@ def set_vocab(self):
2104
2101
for key in added_tokens_json :
2105
2102
token_id = added_tokens_json [key ]
2106
2103
if (token_id >= vocab_size ):
2107
- print (f'ignore token { token_id } : id is out of range, max={ vocab_size - 1 } ' )
2104
+ logger . debug (f'ignore token { token_id } : id is out of range, max={ vocab_size - 1 } ' )
2108
2105
continue
2109
2106
2110
2107
tokens [token_id ] = key .encode ("utf-8" )
You can’t perform that action at this time.
0 commit comments