Skip to content

Commit 97de56d

Browse files
committed
fix: Fix the input to the shared experts
I had misread that the shared experts take the inputs _before_ the standard MoE layer and was feeding the output of the MoE to the shared experts. Branch: GraniteMoEShared Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
1 parent 97df181 commit 97de56d

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

src/llama-model.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4648,7 +4648,7 @@ struct llm_build_llama : public llm_graph_context {
46484648
LLM_NORM_RMS, il);
46494649
cb(cur, "ffn_norm", il);
46504650

4651-
cur = build_moe_ffn(cur,
4651+
ggml_tensor * moe_out = build_moe_ffn(cur,
46524652
model.layers[il].ffn_gate_inp,
46534653
model.layers[il].ffn_up_exps,
46544654
model.layers[il].ffn_gate_exps,
@@ -4659,7 +4659,7 @@ struct llm_build_llama : public llm_graph_context {
46594659
false, 0.0,
46604660
LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX,
46614661
il);
4662-
cb(cur, "ffn_moe_out", il);
4662+
cb(moe_out, "ffn_moe_out", il);
46634663

46644664
// For Granite MoE Shared
46654665
if (model.arch == LLM_ARCH_GRANITE_MOE_SHARED) {
@@ -4671,8 +4671,10 @@ struct llm_build_llama : public llm_graph_context {
46714671
LLM_FFN_SILU, LLM_FFN_PAR, il);
46724672
cb(ffn_shexp, "ffn_shexp", il);
46734673

4674-
cur = ggml_add(ctx0, cur, ffn_shexp);
4674+
cur = ggml_add(ctx0, moe_out, ffn_shexp);
46754675
cb(cur, "ffn_out", il);
4676+
} else {
4677+
cur = moe_out;
46764678
}
46774679
}
46784680

0 commit comments

Comments
 (0)