Skip to content

Commit 17e4d6c

Browse files
committed
mamba : rename metadata to be more similar to transformers library
This breaks existing converted-to-GGUF models, but the metadata names are more "standard". mamba : support mamba-*-hf models These models share their token_embd.weight with their output.weight
1 parent d8024a4 commit 17e4d6c

File tree

5 files changed

+45
-37
lines changed

5 files changed

+45
-37
lines changed

convert-hf-to-gguf.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1903,10 +1903,10 @@ def set_gguf_parameters(self):
19031903
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
19041904
self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
19051905
self.gguf_writer.add_block_count(self.hparams["n_layer"])
1906-
self.gguf_writer.add_ssm_conv_kernel_size(d_conv)
1907-
self.gguf_writer.add_ssm_inner_length(d_inner)
1908-
self.gguf_writer.add_ssm_state_length(d_state)
1909-
self.gguf_writer.add_ssm_dt_rank(dt_rank)
1906+
self.gguf_writer.add_ssm_conv_kernel(d_conv)
1907+
self.gguf_writer.add_ssm_inner_size(d_inner)
1908+
self.gguf_writer.add_ssm_state_size(d_state)
1909+
self.gguf_writer.add_ssm_time_step_rank(dt_rank)
19101910
self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
19111911
self.gguf_writer.add_file_type(self.ftype)
19121912

gguf-py/gguf/constants.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ class Rope:
6262
SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
6363

6464
class SSM:
65-
CONV_KERNEL_SIZE = "{arch}.ssm.d_conv"
66-
INNER_LENGTH = "{arch}.ssm.d_inner"
67-
STATE_LENGTH = "{arch}.ssm.d_state"
68-
DT_RANK = "{arch}.ssm.dt_rank"
65+
CONV_KERNEL = "{arch}.ssm.conv_kernel"
66+
INNER_SIZE = "{arch}.ssm.inner_size"
67+
STATE_SIZE = "{arch}.ssm.state_size"
68+
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
6969

7070
class Tokenizer:
7171
MODEL = "tokenizer.ggml.model"
@@ -770,10 +770,10 @@ def get_type(val: Any) -> GGUFValueType:
770770
KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED
771771

772772
# SSM
773-
KEY_SSM_CONV_KERNEL_SIZE = Keys.SSM.CONV_KERNEL_SIZE
774-
KEY_SSM_INNER_LENGTH = Keys.SSM.INNER_LENGTH
775-
KEY_SSM_STATE_LENGTH = Keys.SSM.STATE_LENGTH
776-
KEY_SSM_DT_RANK = Keys.SSM.DT_RANK
773+
KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
774+
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
775+
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
776+
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
777777

778778
# tokenization
779779
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL

gguf-py/gguf/gguf_writer.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -382,17 +382,17 @@ def add_rope_scaling_orig_ctx_len(self, value: int) -> None:
382382
def add_rope_scaling_finetuned(self, value: bool) -> None:
383383
self.add_bool(Keys.Rope.SCALING_FINETUNED.format(arch=self.arch), value)
384384

385-
def add_ssm_conv_kernel_size(self, value: int) -> None:
386-
self.add_uint32(Keys.SSM.CONV_KERNEL_SIZE.format(arch=self.arch), value)
385+
def add_ssm_conv_kernel(self, value: int) -> None:
386+
self.add_uint32(Keys.SSM.CONV_KERNEL.format(arch=self.arch), value)
387387

388-
def add_ssm_inner_length(self, value: int) -> None:
389-
self.add_uint32(Keys.SSM.INNER_LENGTH.format(arch=self.arch), value)
388+
def add_ssm_inner_size(self, value: int) -> None:
389+
self.add_uint32(Keys.SSM.INNER_SIZE.format(arch=self.arch), value)
390390

391-
def add_ssm_state_length(self, value: int) -> None:
392-
self.add_uint32(Keys.SSM.STATE_LENGTH.format(arch=self.arch), value)
391+
def add_ssm_state_size(self, value: int) -> None:
392+
self.add_uint32(Keys.SSM.STATE_SIZE.format(arch=self.arch), value)
393393

394-
def add_ssm_dt_rank(self, value: int) -> None:
395-
self.add_uint32(Keys.SSM.DT_RANK.format(arch=self.arch), value)
394+
def add_ssm_time_step_rank(self, value: int) -> None:
395+
self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
396396

397397
def add_tokenizer_model(self, model: str) -> None:
398398
self.add_string(Keys.Tokenizer.MODEL, model)

gguf-py/gguf/tensor_mapping.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ class TensorNameMap:
2020
"wte", # gpt2
2121
"transformer.embd.wte", # phi2
2222
"model.tok_embeddings", # internlm2
23-
"model.embedding", # mamba
23+
"model.embedding", # mamba-qbert
2424
"backbone.embedding", # mamba
25+
"backbone.embeddings", # mamba-hf
2526
),
2627

2728
# Token type embeddings
@@ -63,7 +64,7 @@ class TensorNameMap:
6364
"language_model.encoder.final_layernorm", # persimmon
6465
"model.final_layernorm", # persimmon
6566
"lm_head.ln", # phi2
66-
"model.norm_f", # mamba
67+
"model.norm_f", # mamba-qbert
6768
"backbone.norm_f", # mamba
6869
),
6970

@@ -90,7 +91,7 @@ class TensorNameMap:
9091
"transformer.h.{bid}.ln", # phi2
9192
"model.layers.layers.{bid}.norm", # plamo
9293
"model.layers.{bid}.attention_norm", # internlm2
93-
"model.layers.{bid}.norm", # mamba
94+
"model.layers.{bid}.norm", # mamba-qbert
9495
"backbone.layers.{bid}.norm", # mamba
9596
),
9697

llama.cpp

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -286,10 +286,10 @@ enum llm_kv {
286286
LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
287287
LLM_KV_ROPE_SCALING_FINETUNED,
288288

289-
LLM_KV_SSM_D_INNER,
290-
LLM_KV_SSM_D_CONV,
291-
LLM_KV_SSM_D_STATE,
292-
LLM_KV_SSM_DT_RANK,
289+
LLM_KV_SSM_INNER_SIZE,
290+
LLM_KV_SSM_CONV_KERNEL,
291+
LLM_KV_SSM_STATE_SIZE,
292+
LLM_KV_SSM_TIME_STEP_RANK,
293293

294294
LLM_KV_TOKENIZER_MODEL,
295295
LLM_KV_TOKENIZER_LIST,
@@ -349,10 +349,10 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
349349
{ LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, "%s.rope.scaling.original_context_length" },
350350
{ LLM_KV_ROPE_SCALING_FINETUNED, "%s.rope.scaling.finetuned" },
351351

352-
{ LLM_KV_SSM_D_CONV, "%s.ssm.d_conv" },
353-
{ LLM_KV_SSM_D_INNER, "%s.ssm.d_inner"},
354-
{ LLM_KV_SSM_D_STATE, "%s.ssm.d_state"},
355-
{ LLM_KV_SSM_DT_RANK, "%s.ssm.dt_rank"},
352+
{ LLM_KV_SSM_CONV_KERNEL, "%s.ssm.conv_kernel" },
353+
{ LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
354+
{ LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
355+
{ LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
356356

357357
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
358358
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
@@ -3599,10 +3599,10 @@ static void llm_load_hparams(
35993599
} break;
36003600
case LLM_ARCH_MAMBA:
36013601
{
3602-
ml.get_key(LLM_KV_SSM_D_CONV, hparams.ssm_d_conv);
3603-
ml.get_key(LLM_KV_SSM_D_INNER, hparams.ssm_d_inner);
3604-
ml.get_key(LLM_KV_SSM_D_STATE, hparams.ssm_d_state);
3605-
ml.get_key(LLM_KV_SSM_DT_RANK, hparams.ssm_dt_rank);
3602+
ml.get_key(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv);
3603+
ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner);
3604+
ml.get_key(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state);
3605+
ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank);
36063606

36073607
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
36083608

@@ -4864,8 +4864,15 @@ static bool llm_load_tensors(
48644864

48654865
// output
48664866
{
4867-
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
4868-
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
4867+
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
4868+
4869+
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, false);
4870+
// if output is NULL, init from the input tok embed, duplicated to allow offloading
4871+
if (model.output == NULL) {
4872+
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
4873+
ml.n_created--; // artificial tensor
4874+
ml.size_data += ggml_nbytes(model.output);
4875+
}
48694876
}
48704877

48714878
for (int i = 0; i < n_layer; ++i) {

0 commit comments

Comments
 (0)