@@ -6577,6 +6577,7 @@ static void llm_load_vocab(
6577
6577
|| t.first == "<end_of_turn>"
6578
6578
|| t.first == "<|endoftext|>"
6579
6579
|| t.first == "<EOT>"
6580
+ || t.first == "<|end▁of▁sentence|>" // DeepSeek
6580
6581
) {
6581
6582
vocab.special_eot_id = t.second;
6582
6583
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@@ -6591,7 +6592,7 @@ static void llm_load_vocab(
6591
6592
if (vocab.special_eom_id == LLAMA_TOKEN_NULL) {
6592
6593
if (false
6593
6594
|| t.first == "<|eom_id|>"
6594
- ) {
6595
+ ) {
6595
6596
vocab.special_eom_id = t.second;
6596
6597
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
6597
6598
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
@@ -6604,9 +6605,11 @@ static void llm_load_vocab(
6604
6605
// find FIM_PRE token: "<|fim_prefix|>", "<fim-prefix>", "<PRE>", etc.
6605
6606
if (vocab.special_fim_pre_id == LLAMA_TOKEN_NULL) {
6606
6607
if (false
6607
- || t.first == "<|fim_prefix|>"
6608
+ || t.first == "<|fim_prefix|>" // Qwen
6608
6609
|| t.first == "<fim-prefix>"
6609
- || t.first == "<PRE>") {
6610
+ || t.first == "<|fim▁begin|>" // DeepSeek
6611
+ || t.first == "<PRE>"
6612
+ ) {
6610
6613
vocab.special_fim_pre_id = t.second;
6611
6614
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
6612
6615
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
@@ -6619,9 +6622,11 @@ static void llm_load_vocab(
6619
6622
// find FIM_SUF token: "<|fim_suffix|>", "<fim-suffix>", "<SUF>", etc.
6620
6623
if (vocab.special_fim_suf_id == LLAMA_TOKEN_NULL) {
6621
6624
if (false
6622
- || t.first == "<|fim_suffix|>"
6625
+ || t.first == "<|fim_suffix|>" // Qwen
6623
6626
|| t.first == "<fim-suffix>"
6624
- || t.first == "<SUF>") {
6627
+ || t.first == "<|fim▁hole|>" // DeepSeek
6628
+ || t.first == "<SUF>"
6629
+ ) {
6625
6630
vocab.special_fim_suf_id = t.second;
6626
6631
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
6627
6632
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
@@ -6634,9 +6639,11 @@ static void llm_load_vocab(
6634
6639
// find FIM_MID token: "<|fim_middle|>", "<fim-middle>", "<MID>", etc.
6635
6640
if (vocab.special_fim_mid_id == LLAMA_TOKEN_NULL) {
6636
6641
if (false
6637
- || t.first == "<|fim_middle|>"
6642
+ || t.first == "<|fim_middle|>" // Qwen
6638
6643
|| t.first == "<fim-middle>"
6639
- || t.first == "<MID>") {
6644
+ || t.first == "<|fim▁end|>" // DeepSeek
6645
+ || t.first == "<MID>"
6646
+ ) {
6640
6647
vocab.special_fim_mid_id = t.second;
6641
6648
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
6642
6649
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
@@ -6649,9 +6656,10 @@ static void llm_load_vocab(
6649
6656
// find FIM_PAD token: "<|fim_pad|>", "<fim-pad>", "<PAD>", etc.
6650
6657
if (vocab.special_fim_pad_id == LLAMA_TOKEN_NULL) {
6651
6658
if (false
6652
- || t.first == "<|fim_pad|>"
6659
+ || t.first == "<|fim_pad|>" // Qwen
6653
6660
|| t.first == "<fim-pad>"
6654
- || t.first == "<PAD>") {
6661
+ || t.first == "<PAD>"
6662
+ ) {
6655
6663
vocab.special_fim_pad_id = t.second;
6656
6664
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
6657
6665
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
@@ -6664,10 +6672,11 @@ static void llm_load_vocab(
6664
6672
// find FIM_REP token: "<|fim_repo|>", "<fim-repo>", "<REP>", etc.
6665
6673
if (vocab.special_fim_rep_id == LLAMA_TOKEN_NULL) {
6666
6674
if (false
6667
- || t.first == "<|fim_repo|>"
6675
+ || t.first == "<|fim_repo|>" // Qwen
6668
6676
|| t.first == "<|repo_name|>"
6669
6677
|| t.first == "<fim-repo>"
6670
- || t.first == "<REPO>") {
6678
+ || t.first == "<REPO>"
6679
+ ) {
6671
6680
vocab.special_fim_rep_id = t.second;
6672
6681
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
6673
6682
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
@@ -6680,7 +6689,8 @@ static void llm_load_vocab(
6680
6689
// find FIM_SEP token: "<|file_sep|>"
6681
6690
if (vocab.special_fim_sep_id == LLAMA_TOKEN_NULL) {
6682
6691
if (false
6683
- || t.first == "<|file_sep|>") {
6692
+ || t.first == "<|file_sep|>" // Qwen
6693
+ ) {
6684
6694
vocab.special_fim_sep_id = t.second;
6685
6695
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
6686
6696
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
@@ -19512,7 +19522,7 @@ struct llama_context * llama_new_context_with_model(
19512
19522
}
19513
19523
19514
19524
LLAMA_LOG_INFO("%s: KV self size = %7.2f MiB, K (%s): %7.2f MiB, V (%s): %7.2f MiB\n", __func__,
19515
- (float)(memory_size_k + memory_size_v) / (1024.0f * 1024.0f),
19525
+ (float)(memory_size_k + memory_size_v) / (1024.0f * 1024.0f),
19516
19526
ggml_type_name(type_k), (float)memory_size_k / (1024.0f * 1024.0f),
19517
19527
ggml_type_name(type_v), (float)memory_size_v / (1024.0f * 1024.0f));
19518
19528
}
0 commit comments