Skip to content

Commit c7f00fc

Browse files
tokenization: add warning for double BOS
1 parent 9afdffe commit c7f00fc

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

llama.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12818,6 +12818,13 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
1281812818
}
1281912819
}
1282012820

12821+
if (add_special && vocab.special_add_bos != 0 && output[1] == vocab.special_bos_id) {
12822+
LLAMA_LOG_WARN(
12823+
"%s: Added a BOS token to the prompt as specified by the model but the prompt "
12824+
"also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
12825+
"Are you sure this is what you want?\n", __FUNCTION__);
12826+
}
12827+
1282112828
if (add_special && vocab.special_add_eos == 1) {
1282212829
GGML_ASSERT(vocab.special_eos_id != -1);
1282312830
output.push_back(vocab.special_eos_id);
@@ -12844,6 +12851,13 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
1284412851
}
1284512852
}
1284612853

12854+
if (add_special && vocab.special_add_bos != 0 && output[1] == vocab.special_bos_id) {
12855+
LLAMA_LOG_WARN(
12856+
"%s: Added a BOS token to the prompt as specified by the model but the prompt "
12857+
"also starts with a BOS token. So now the final prompt starts with 2 BOS tokens. "
12858+
"Are you sure this is what you want?\n", __FUNCTION__);
12859+
}
12860+
1284712861
if (add_special && vocab.special_add_eos == 1) {
1284812862
GGML_ASSERT(vocab.special_add_eos != -1);
1284912863
output.push_back(vocab.special_eos_id);

0 commit comments

Comments
 (0)