Skip to content

Commit 5a98306

Browse files
committed
fix: Split MoE fused tensors for shared experts in conversion
Branch: GraniteMoEShared Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
1 parent 095a079 commit 5a98306

File tree

3 files changed

+19
-1
lines changed

3 files changed

+19
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5675,6 +5675,24 @@ def set_gguf_parameters(self):
56755675
self.gguf_writer.add_expert_shared_feed_forward_length(shared_feed_forward_length)
56765676
logger.info("gguf: (granitemoeshared) shared_feed_forward_length = %s", shared_feed_forward_length)
56775677

5678+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5679+
"""In modeling_granitemoeshared, the implementation of parallel experts
5680+
is used. This essentially merges w1 and w3 into a single tensor with 2x
5681+
the hidden size that is then split during forward. To keep compatibility
5682+
with existing shared expert support, we pull them apart here.
5683+
"""
5684+
5685+
if name.endswith("shared_mlp.input_linear.weight"):
5686+
ffn_dim = self.hparams["shared_intermediate_size"]
5687+
assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * shared_intermediate_size"
5688+
gate, up = data_torch[..., :ffn_dim, :], data_torch[..., ffn_dim:, :]
5689+
return [
5690+
(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), gate),
5691+
(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), up),
5692+
]
5693+
5694+
return super().modify_tensors(data_torch, name, bid)
5695+
56785696

56795697
@ModelBase.register("BailingMoeForCausalLM")
56805698
class BailingMoeModel(TextModel):

gguf-py/gguf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,6 +1910,7 @@ class MODEL_TENSOR(IntEnum):
19101910
MODEL_TENSOR.FFN_GATE_EXP,
19111911
MODEL_TENSOR.FFN_DOWN_EXP,
19121912
MODEL_TENSOR.FFN_UP_EXP,
1913+
MODEL_TENSOR.FFN_GATE_SHEXP,
19131914
MODEL_TENSOR.FFN_UP_SHEXP,
19141915
MODEL_TENSOR.FFN_DOWN_SHEXP,
19151916
],

gguf-py/gguf/tensor_mapping.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,6 @@ class TensorNameMap:
346346
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
347347
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
348348
"language_model.model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
349-
"model.layers.{bid}.shared_mlp.input_linear", # granitemoeshared
350349
),
351350

352351
# AWQ-activation gate

0 commit comments

Comments
 (0)