Skip to content

Commit d3bd719

Browse files
bozheng-hitbozheng-hit
and
bozheng-hit
authored
llama : Support Qwen3 and Qwen3MoE (#12828)
* add qwen3 & qwen3moe support. * fix --------- Co-authored-by: bozheng-hit <dsoul0621@gmail.com>
1 parent d9a63b2 commit d3bd719

File tree

5 files changed

+441
-0
lines changed

5 files changed

+441
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2459,6 +2459,16 @@ def prepare_tensors(self):
24592459
raise ValueError(f"Unprocessed experts: {experts}")
24602460

24612461

2462+
@Model.register("Qwen3ForCausalLM")
2463+
class Qwen3Model(Qwen2Model):
2464+
model_arch = gguf.MODEL_ARCH.QWEN3
2465+
2466+
2467+
@Model.register("Qwen3MoeForCausalLM")
2468+
class Qwen3MoeModel(Qwen2MoeModel):
2469+
model_arch = gguf.MODEL_ARCH.QWEN3MOE
2470+
2471+
24622472
@Model.register("GPT2LMHeadModel")
24632473
class GPT2Model(Model):
24642474
model_arch = gguf.MODEL_ARCH.GPT2

gguf-py/gguf/constants.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,8 @@ class MODEL_ARCH(IntEnum):
248248
QWEN2 = auto()
249249
QWEN2MOE = auto()
250250
QWEN2VL = auto()
251+
QWEN3 = auto()
252+
QWEN3MOE = auto()
251253
PHI2 = auto()
252254
PHI3 = auto()
253255
PHIMOE = auto()
@@ -453,6 +455,8 @@ class MODEL_TENSOR(IntEnum):
453455
MODEL_ARCH.QWEN2: "qwen2",
454456
MODEL_ARCH.QWEN2MOE: "qwen2moe",
455457
MODEL_ARCH.QWEN2VL: "qwen2vl",
458+
MODEL_ARCH.QWEN3: "qwen3",
459+
MODEL_ARCH.QWEN3MOE: "qwen3moe",
456460
MODEL_ARCH.PHI2: "phi2",
457461
MODEL_ARCH.PHI3: "phi3",
458462
MODEL_ARCH.PHIMOE: "phimoe",
@@ -953,6 +957,40 @@ class MODEL_TENSOR(IntEnum):
953957
MODEL_TENSOR.FFN_DOWN_SHEXP,
954958
MODEL_TENSOR.FFN_UP_SHEXP,
955959
],
960+
MODEL_ARCH.QWEN3: [
961+
MODEL_TENSOR.TOKEN_EMBD,
962+
MODEL_TENSOR.OUTPUT_NORM,
963+
MODEL_TENSOR.OUTPUT,
964+
MODEL_TENSOR.ROPE_FREQS,
965+
MODEL_TENSOR.ATTN_NORM,
966+
MODEL_TENSOR.ATTN_Q,
967+
MODEL_TENSOR.ATTN_Q_NORM,
968+
MODEL_TENSOR.ATTN_K,
969+
MODEL_TENSOR.ATTN_K_NORM,
970+
MODEL_TENSOR.ATTN_V,
971+
MODEL_TENSOR.ATTN_OUT,
972+
MODEL_TENSOR.FFN_NORM,
973+
MODEL_TENSOR.FFN_GATE,
974+
MODEL_TENSOR.FFN_DOWN,
975+
MODEL_TENSOR.FFN_UP,
976+
],
977+
MODEL_ARCH.QWEN3MOE: [
978+
MODEL_TENSOR.TOKEN_EMBD,
979+
MODEL_TENSOR.OUTPUT_NORM,
980+
MODEL_TENSOR.OUTPUT,
981+
MODEL_TENSOR.ATTN_NORM,
982+
MODEL_TENSOR.ATTN_Q,
983+
MODEL_TENSOR.ATTN_Q_NORM,
984+
MODEL_TENSOR.ATTN_K,
985+
MODEL_TENSOR.ATTN_K_NORM,
986+
MODEL_TENSOR.ATTN_V,
987+
MODEL_TENSOR.ATTN_OUT,
988+
MODEL_TENSOR.FFN_NORM,
989+
MODEL_TENSOR.FFN_GATE_INP,
990+
MODEL_TENSOR.FFN_GATE_EXP,
991+
MODEL_TENSOR.FFN_DOWN_EXP,
992+
MODEL_TENSOR.FFN_UP_EXP,
993+
],
956994
MODEL_ARCH.PLAMO: [
957995
MODEL_TENSOR.TOKEN_EMBD,
958996
MODEL_TENSOR.OUTPUT_NORM,

src/llama-arch.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
2626
{ LLM_ARCH_QWEN2, "qwen2" },
2727
{ LLM_ARCH_QWEN2MOE, "qwen2moe" },
2828
{ LLM_ARCH_QWEN2VL, "qwen2vl" },
29+
{ LLM_ARCH_QWEN3, "qwen3" },
30+
{ LLM_ARCH_QWEN3MOE, "qwen3moe" },
2931
{ LLM_ARCH_PHI2, "phi2" },
3032
{ LLM_ARCH_PHI3, "phi3" },
3133
{ LLM_ARCH_PHIMOE, "phimoe" },
@@ -595,6 +597,45 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
595597
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
596598
},
597599
},
600+
{
601+
LLM_ARCH_QWEN3,
602+
{
603+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
604+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
605+
{ LLM_TENSOR_OUTPUT, "output" },
606+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
607+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
608+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
609+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
610+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
611+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
612+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
613+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
614+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
615+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
616+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
617+
},
618+
},
619+
{
620+
LLM_ARCH_QWEN3MOE,
621+
{
622+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
623+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
624+
{ LLM_TENSOR_OUTPUT, "output" },
625+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
626+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
627+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
628+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
629+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
630+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
631+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
632+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
633+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
634+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
635+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
636+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
637+
},
638+
},
598639
{
599640
LLM_ARCH_PHI2,
600641
{

src/llama-arch.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ enum llm_arch {
3030
LLM_ARCH_QWEN2,
3131
LLM_ARCH_QWEN2MOE,
3232
LLM_ARCH_QWEN2VL,
33+
LLM_ARCH_QWEN3,
34+
LLM_ARCH_QWEN3MOE,
3335
LLM_ARCH_PHI2,
3436
LLM_ARCH_PHI3,
3537
LLM_ARCH_PHIMOE,

0 commit comments

Comments
 (0)