Skip to content

Commit 91a3530

Browse files
committed
llama : refactor wavtokenizer tensors
ggml-ci
1 parent b6c9e70 commit 91a3530

File tree

8 files changed

+394
-509
lines changed

8 files changed

+394
-509
lines changed

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
842842
}
843843
).set_sparam());
844844
add_opt(common_arg(
845-
{"--sampling-seq"}, "SEQUENCE",
845+
{"--sampling-seq", "--sampler-seq"}, "SEQUENCE",
846846
string_format("simplified sequence for samplers that will be used (default: %s)", sampler_type_chars.c_str()),
847847
[](common_params & params, const std::string & value) {
848848
params.sampling.samplers = common_sampler_types_from_chars(value);

convert_hf_to_gguf.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,8 @@ def prepare_tensors(self):
326326
gguf.MODEL_TENSOR.TIME_MIX_W2,
327327
gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1,
328328
gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2,
329-
gguf.MODEL_TENSOR.POS_NET_NORM1,
330-
gguf.MODEL_TENSOR.POS_NET_NORM2,
329+
gguf.MODEL_TENSOR.POSNET_NORM1,
330+
gguf.MODEL_TENSOR.POSNET_NORM2,
331331
)
332332
)
333333
or not new_name.endswith(".weight")
@@ -2059,12 +2059,16 @@ def set_gguf_parameters(self):
20592059
super().set_gguf_parameters()
20602060
self.gguf_writer.add_vocab_size (self.hparams["vocab_size"])
20612061
self.gguf_writer.add_features_length (self.hparams["n_embd_features"])
2062-
self.gguf_writer.add_posnet_length (self.hparams["n_embd_posnet"])
2063-
self.gguf_writer.add_convnext_length (self.hparams["n_embd_convnext"])
20642062
self.gguf_writer.add_feed_forward_length(self.hparams["n_ff"])
20652063
self.gguf_writer.add_group_norm_eps (self.hparams["group_norm_epsilon"])
20662064
self.gguf_writer.add_group_norm_groups (self.hparams["group_norm_groups"])
20672065

2066+
self.gguf_writer.add_posnet_embedding_length(self.hparams["posnet"]["n_embd"])
2067+
self.gguf_writer.add_posnet_block_count (self.hparams["posnet"]["n_layer"])
2068+
2069+
self.gguf_writer.add_convnext_embedding_length(self.hparams["convnext"]["n_embd"])
2070+
self.gguf_writer.add_convnext_block_count (self.hparams["convnext"]["n_layer"])
2071+
20682072

20692073
@Model.register("Qwen2MoeForCausalLM")
20702074
class Qwen2MoeModel(Model):

examples/tts/convert_pt_to_hf.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,13 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
7474
new_key = key
7575

7676
new_key = new_key.replace('state_dict.', '')
77+
new_key = new_key.replace('pos_net', 'posnet')
7778

78-
# check if matches "backbone.pos_net.%d.bias" or "backbone.pos_net.%d.weight"
79-
if new_key.startswith("backbone.pos_net."):
80-
match = re.match(r"backbone\.pos_net\.(\d+)\.(bias|weight)", new_key)
79+
# check if matches "backbone.posnet.%d.bias" or "backbone.posnet.%d.weight"
80+
if new_key.startswith("backbone.posnet."):
81+
match = re.match(r"backbone\.posnet\.(\d+)\.(bias|weight)", new_key)
8182
if match:
82-
new_key = f"backbone.pos_net.{match.group(1)}.norm.{match.group(2)}"
83+
new_key = f"backbone.posnet.{match.group(1)}.norm.{match.group(2)}"
8384

8485
# "feature_extractor.encodec.quantizer.vq.layers.0._codebook.embed" -> "backbone.embedding.weight"
8586
if new_key == "feature_extractor.encodec.quantizer.vq.layers.0._codebook.embed":
@@ -99,7 +100,7 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
99100
new_key = new_key.replace("gamma", "gamma.weight")
100101

101102
# convert from 1D [768] to 2D [768, 1] so that ggml_add can broadcast the bias
102-
if (new_key.endswith("norm.weight") or new_key.endswith("norm1.weight") or new_key.endswith("norm2.weight") or new_key.endswith(".bias")) and (new_key.startswith("backbone.pos_net") or new_key.startswith("backbone.embed.bias")):
103+
if (new_key.endswith("norm.weight") or new_key.endswith("norm1.weight") or new_key.endswith("norm2.weight") or new_key.endswith(".bias")) and (new_key.startswith("backbone.posnet") or new_key.startswith("backbone.embed.bias")):
103104
value = value.unsqueeze(1)
104105

105106
if new_key.endswith("dwconv.bias"):
@@ -155,16 +156,26 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
155156
],
156157
"hidden_size": 1282,
157158
"n_embd_features": 512,
158-
"n_embd_posnet": 768,
159-
"n_embd_convnext": 768,
160159
"n_ff": 2304,
161160
"vocab_size": 4096,
162161
"n_head": 1,
163162
"layer_norm_epsilon": 1e-6,
164163
"group_norm_epsilon": 1e-6,
165164
"group_norm_groups": 32,
166165
"max_position_embeddings": 8192, # ?
167-
"num_hidden_layers": 12
166+
"n_layer": 12,
167+
"posnet": {
168+
"n_embd": 768,
169+
"n_layer": 6
170+
},
171+
"convnext": {
172+
"n_embd": 768,
173+
"n_layer": 12
174+
},
175+
#"n_embd_posnet": 768,
176+
#"n_embd_convnext": 768,
177+
#"n_layer_posnet": 6,
178+
#"n_layer_convnext": 12
168179
}
169180

170181
with open(path_dst + '/config.json', 'w') as f:

examples/tts/tts.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,10 @@ int main(int argc, char ** argv) {
476476
smpl[i] = common_sampler_init(model_ttc, params.sampling);
477477
}
478478

479+
LOG_INF("sampler seed: %u\n", common_sampler_get_seed(smpl[0]));
480+
LOG_INF("sampler params: \n%s\n", params.sampling.print().c_str());
481+
LOG_INF("sampler chain: %s\n", common_sampler_print(smpl[0]).c_str());
482+
479483
LOG_INF("%s: loading done\n", __func__);
480484

481485
const auto t_main_start = ggml_time_us();

gguf-py/gguf/constants.py

Lines changed: 53 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,6 @@ class LLM:
9191
CONTEXT_LENGTH = "{arch}.context_length"
9292
EMBEDDING_LENGTH = "{arch}.embedding_length"
9393
FEATURES_LENGTH = "{arch}.features_length"
94-
POSNET_LENGTH = "{arch}.posnet_length"
95-
CONVNEXT_LENGTH = "{arch}.convnext_length"
9694
BLOCK_COUNT = "{arch}.block_count"
9795
LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
9896
FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
@@ -160,6 +158,14 @@ class SSM:
160158
class WKV:
161159
HEAD_SIZE = "{arch}.wkv.head_size"
162160

161+
class PosNet:
162+
EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
163+
BLOCK_COUNT = "{arch}.posnet.block_count"
164+
165+
class ConvNext:
166+
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
167+
BLOCK_COUNT = "{arch}.convnext.block_count"
168+
163169
class Tokenizer:
164170
MODEL = "tokenizer.ggml.model"
165171
PRE = "tokenizer.ggml.pre"
@@ -377,21 +383,21 @@ class MODEL_TENSOR(IntEnum):
377383
CLS = auto() # classifier
378384
CLS_OUT = auto() # classifier output projection
379385
CONV1D = auto()
380-
CONV_NEXT_DW = auto()
381-
CONV_NEXT_NORM = auto()
382-
CONV_NEXT_PW1 = auto()
383-
CONV_NEXT_PW2 = auto()
384-
CONV_NEXT_GAMMA = auto()
385-
POS_NET_CONV1 = auto()
386-
POS_NET_CONV2 = auto()
387-
POS_NET_NORM = auto()
388-
POS_NET_NORM1 = auto()
389-
POS_NET_NORM2 = auto()
390-
POS_NET_ATTN_NORM = auto()
391-
POS_NET_ATTN_Q = auto()
392-
POS_NET_ATTN_K = auto()
393-
POS_NET_ATTN_V = auto()
394-
POS_NET_ATTN_OUT = auto()
386+
CONVNEXT_DW = auto()
387+
CONVNEXT_NORM = auto()
388+
CONVNEXT_PW1 = auto()
389+
CONVNEXT_PW2 = auto()
390+
CONVNEXT_GAMMA = auto()
391+
POSNET_CONV1 = auto()
392+
POSNET_CONV2 = auto()
393+
POSNET_NORM = auto()
394+
POSNET_NORM1 = auto()
395+
POSNET_NORM2 = auto()
396+
POSNET_ATTN_NORM = auto()
397+
POSNET_ATTN_Q = auto()
398+
POSNET_ATTN_K = auto()
399+
POSNET_ATTN_V = auto()
400+
POSNET_ATTN_OUT = auto()
395401

396402

397403
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
@@ -558,21 +564,21 @@ class MODEL_TENSOR(IntEnum):
558564
MODEL_TENSOR.CLS: "cls",
559565
MODEL_TENSOR.CLS_OUT: "cls.output",
560566
MODEL_TENSOR.CONV1D: "conv1d",
561-
MODEL_TENSOR.CONV_NEXT_DW: "conv_next.{bid}.dw",
562-
MODEL_TENSOR.CONV_NEXT_NORM: "conv_next.{bid}.norm",
563-
MODEL_TENSOR.CONV_NEXT_PW1: "conv_next.{bid}.pw1",
564-
MODEL_TENSOR.CONV_NEXT_PW2: "conv_next.{bid}.pw2",
565-
MODEL_TENSOR.CONV_NEXT_GAMMA: "conv_next.{bid}.gamma",
566-
MODEL_TENSOR.POS_NET_CONV1: "pos_net.{bid}.conv1",
567-
MODEL_TENSOR.POS_NET_CONV2: "pos_net.{bid}.conv2",
568-
MODEL_TENSOR.POS_NET_NORM: "pos_net.{bid}.norm",
569-
MODEL_TENSOR.POS_NET_NORM1: "pos_net.{bid}.norm1",
570-
MODEL_TENSOR.POS_NET_NORM2: "pos_net.{bid}.norm2",
571-
MODEL_TENSOR.POS_NET_ATTN_NORM: "pos_net.{bid}.attn_norm",
572-
MODEL_TENSOR.POS_NET_ATTN_Q: "pos_net.{bid}.attn_q",
573-
MODEL_TENSOR.POS_NET_ATTN_K: "pos_net.{bid}.attn_k",
574-
MODEL_TENSOR.POS_NET_ATTN_V: "pos_net.{bid}.attn_v",
575-
MODEL_TENSOR.POS_NET_ATTN_OUT: "pos_net.{bid}.attn_output",
567+
MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
568+
MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
569+
MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
570+
MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
571+
MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
572+
MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
573+
MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
574+
MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
575+
MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
576+
MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
577+
MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
578+
MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
579+
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
580+
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
581+
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
576582
}
577583

578584
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -1415,23 +1421,23 @@ class MODEL_TENSOR(IntEnum):
14151421
MODEL_TENSOR.TOKEN_EMBD,
14161422
MODEL_TENSOR.TOKEN_EMBD_NORM,
14171423
MODEL_TENSOR.CONV1D,
1418-
MODEL_TENSOR.CONV_NEXT_DW,
1419-
MODEL_TENSOR.CONV_NEXT_NORM,
1420-
MODEL_TENSOR.CONV_NEXT_PW1,
1421-
MODEL_TENSOR.CONV_NEXT_PW2,
1422-
MODEL_TENSOR.CONV_NEXT_GAMMA,
1424+
MODEL_TENSOR.CONVNEXT_DW,
1425+
MODEL_TENSOR.CONVNEXT_NORM,
1426+
MODEL_TENSOR.CONVNEXT_PW1,
1427+
MODEL_TENSOR.CONVNEXT_PW2,
1428+
MODEL_TENSOR.CONVNEXT_GAMMA,
14231429
MODEL_TENSOR.OUTPUT,
14241430
MODEL_TENSOR.OUTPUT_NORM,
1425-
MODEL_TENSOR.POS_NET_CONV1,
1426-
MODEL_TENSOR.POS_NET_CONV2,
1427-
MODEL_TENSOR.POS_NET_NORM,
1428-
MODEL_TENSOR.POS_NET_NORM1,
1429-
MODEL_TENSOR.POS_NET_NORM2,
1430-
MODEL_TENSOR.POS_NET_ATTN_NORM,
1431-
MODEL_TENSOR.POS_NET_ATTN_Q,
1432-
MODEL_TENSOR.POS_NET_ATTN_K,
1433-
MODEL_TENSOR.POS_NET_ATTN_V,
1434-
MODEL_TENSOR.POS_NET_ATTN_OUT,
1431+
MODEL_TENSOR.POSNET_CONV1,
1432+
MODEL_TENSOR.POSNET_CONV2,
1433+
MODEL_TENSOR.POSNET_NORM,
1434+
MODEL_TENSOR.POSNET_NORM1,
1435+
MODEL_TENSOR.POSNET_NORM2,
1436+
MODEL_TENSOR.POSNET_ATTN_NORM,
1437+
MODEL_TENSOR.POSNET_ATTN_Q,
1438+
MODEL_TENSOR.POSNET_ATTN_K,
1439+
MODEL_TENSOR.POSNET_ATTN_V,
1440+
MODEL_TENSOR.POSNET_ATTN_OUT,
14351441
],
14361442
# TODO
14371443
}

gguf-py/gguf/gguf_writer.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -634,11 +634,17 @@ def add_embedding_length(self, length: int) -> None:
634634
def add_features_length(self, length: int) -> None:
635635
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
636636

637-
def add_posnet_length(self, length: int) -> None:
638-
self.add_uint32(Keys.LLM.POSNET_LENGTH.format(arch=self.arch), length)
637+
def add_posnet_embedding_length(self, length: int) -> None:
638+
self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
639639

640-
def add_convnext_length(self, length: int) -> None:
641-
self.add_uint32(Keys.LLM.CONVNEXT_LENGTH.format(arch=self.arch), length)
640+
def add_posnet_block_count(self, length: int) -> None:
641+
self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
642+
643+
def add_convnext_embedding_length(self, length: int) -> None:
644+
self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
645+
646+
def add_convnext_block_count(self, length: int) -> None:
647+
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
642648

643649
def add_block_count(self, length: int) -> None:
644650
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)

gguf-py/gguf/tensor_mapping.py

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -704,64 +704,64 @@ class TensorNameMap:
704704
),
705705
#############################################################################
706706

707-
MODEL_TENSOR.CONV_NEXT_DW: (
707+
MODEL_TENSOR.CONVNEXT_DW: (
708708
"backbone.convnext.{bid}.dwconv", # wavtokenizer
709709
),
710710

711-
MODEL_TENSOR.CONV_NEXT_NORM: (
711+
MODEL_TENSOR.CONVNEXT_NORM: (
712712
"backbone.convnext.{bid}.norm", # wavtokenizer
713713
),
714714

715-
MODEL_TENSOR.CONV_NEXT_PW1: (
715+
MODEL_TENSOR.CONVNEXT_PW1: (
716716
"backbone.convnext.{bid}.pwconv1", # wavtokenizer
717717
),
718718

719-
MODEL_TENSOR.CONV_NEXT_PW2: (
719+
MODEL_TENSOR.CONVNEXT_PW2: (
720720
"backbone.convnext.{bid}.pwconv2", # wavtokenizer
721721
),
722722

723-
MODEL_TENSOR.CONV_NEXT_GAMMA: (
723+
MODEL_TENSOR.CONVNEXT_GAMMA: (
724724
"backbone.convnext.{bid}.gamma", # wavtokenizer
725725
),
726726

727-
MODEL_TENSOR.POS_NET_CONV1: (
728-
"backbone.pos_net.{bid}.conv1", # wavtokenizer
727+
MODEL_TENSOR.POSNET_CONV1: (
728+
"backbone.posnet.{bid}.conv1", # wavtokenizer
729729
),
730730

731-
MODEL_TENSOR.POS_NET_CONV2: (
732-
"backbone.pos_net.{bid}.conv2", # wavtokenizer
731+
MODEL_TENSOR.POSNET_CONV2: (
732+
"backbone.posnet.{bid}.conv2", # wavtokenizer
733733
),
734734

735-
MODEL_TENSOR.POS_NET_NORM: (
736-
"backbone.pos_net.{bid}.norm", # wavtokenizer
735+
MODEL_TENSOR.POSNET_NORM: (
736+
"backbone.posnet.{bid}.norm", # wavtokenizer
737737
),
738738

739-
MODEL_TENSOR.POS_NET_NORM1: (
740-
"backbone.pos_net.{bid}.norm1", # wavtokenizer
739+
MODEL_TENSOR.POSNET_NORM1: (
740+
"backbone.posnet.{bid}.norm1", # wavtokenizer
741741
),
742742

743-
MODEL_TENSOR.POS_NET_NORM2: (
744-
"backbone.pos_net.{bid}.norm2", # wavtokenizer
743+
MODEL_TENSOR.POSNET_NORM2: (
744+
"backbone.posnet.{bid}.norm2", # wavtokenizer
745745
),
746746

747-
MODEL_TENSOR.POS_NET_ATTN_NORM: (
748-
"backbone.pos_net.{bid}.norm", # wavtokenizer
747+
MODEL_TENSOR.POSNET_ATTN_NORM: (
748+
"backbone.posnet.{bid}.norm", # wavtokenizer
749749
),
750750

751-
MODEL_TENSOR.POS_NET_ATTN_Q: (
752-
"backbone.pos_net.{bid}.q", # wavtokenizer
751+
MODEL_TENSOR.POSNET_ATTN_Q: (
752+
"backbone.posnet.{bid}.q", # wavtokenizer
753753
),
754754

755-
MODEL_TENSOR.POS_NET_ATTN_K: (
756-
"backbone.pos_net.{bid}.k", # wavtokenizer
755+
MODEL_TENSOR.POSNET_ATTN_K: (
756+
"backbone.posnet.{bid}.k", # wavtokenizer
757757
),
758758

759-
MODEL_TENSOR.POS_NET_ATTN_V: (
760-
"backbone.pos_net.{bid}.v", # wavtokenizer
759+
MODEL_TENSOR.POSNET_ATTN_V: (
760+
"backbone.posnet.{bid}.v", # wavtokenizer
761761
),
762762

763-
MODEL_TENSOR.POS_NET_ATTN_OUT: (
764-
"backbone.pos_net.{bid}.proj_out", # wavtokenizer
763+
MODEL_TENSOR.POSNET_ATTN_OUT: (
764+
"backbone.posnet.{bid}.proj_out", # wavtokenizer
765765
),
766766
}
767767

0 commit comments

Comments
 (0)