Skip to content

Commit b6c9e70

Browse files
committed
tts : fix tensor shapes
1 parent 609f17d commit b6c9e70

File tree

6 files changed

+101
-75
lines changed

6 files changed

+101
-75
lines changed

convert_hf_to_gguf.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,8 @@ def prepare_tensors(self):
326326
gguf.MODEL_TENSOR.TIME_MIX_W2,
327327
gguf.MODEL_TENSOR.TIME_MIX_DECAY_W1,
328328
gguf.MODEL_TENSOR.TIME_MIX_DECAY_W2,
329+
gguf.MODEL_TENSOR.POS_NET_NORM1,
330+
gguf.MODEL_TENSOR.POS_NET_NORM2,
329331
)
330332
)
331333
or not new_name.endswith(".weight")
@@ -2060,6 +2062,8 @@ def set_gguf_parameters(self):
20602062
self.gguf_writer.add_posnet_length (self.hparams["n_embd_posnet"])
20612063
self.gguf_writer.add_convnext_length (self.hparams["n_embd_convnext"])
20622064
self.gguf_writer.add_feed_forward_length(self.hparams["n_ff"])
2065+
self.gguf_writer.add_group_norm_eps (self.hparams["group_norm_epsilon"])
2066+
self.gguf_writer.add_group_norm_groups (self.hparams["group_norm_groups"])
20632067

20642068

20652069
@Model.register("Qwen2MoeForCausalLM")

examples/tts/convert_pt_to_hf.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,13 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
9898
if new_key.endswith("gamma"):
9999
new_key = new_key.replace("gamma", "gamma.weight")
100100

101+
# convert from 1D [768] to 2D [768, 1] so that ggml_add can broadcast the bias
102+
if (new_key.endswith("norm.weight") or new_key.endswith("norm1.weight") or new_key.endswith("norm2.weight") or new_key.endswith(".bias")) and (new_key.startswith("backbone.pos_net") or new_key.startswith("backbone.embed.bias")):
103+
value = value.unsqueeze(1)
104+
105+
if new_key.endswith("dwconv.bias"):
106+
value = value.unsqueeze(1)
107+
101108
size_mb = value.element_size() * value.nelement() / (1024 * 1024)
102109
print(f"{size_mb:8.2f} MB - {new_key}: {value.shape}")
103110

@@ -154,6 +161,8 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
154161
"vocab_size": 4096,
155162
"n_head": 1,
156163
"layer_norm_epsilon": 1e-6,
164+
"group_norm_epsilon": 1e-6,
165+
"group_norm_groups": 32,
157166
"max_position_embeddings": 8192, # ?
158167
"num_hidden_layers": 12
159168
}

gguf-py/gguf/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ class Attention:
125125
VALUE_LENGTH = "{arch}.attention.value_length"
126126
LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
127127
LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
128+
GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
129+
GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
128130
CAUSAL = "{arch}.attention.causal"
129131
Q_LORA_RANK = "{arch}.attention.q_lora_rank"
130132
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"

gguf-py/gguf/gguf_writer.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -631,9 +631,6 @@ def add_context_length(self, length: int) -> None:
631631
def add_embedding_length(self, length: int) -> None:
632632
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
633633

634-
def add_embedding_length(self, length: int) -> None:
635-
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
636-
637634
def add_features_length(self, length: int) -> None:
638635
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
639636

@@ -739,6 +736,12 @@ def add_layer_norm_eps(self, value: float) -> None:
739736
def add_layer_norm_rms_eps(self, value: float) -> None:
740737
self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
741738

739+
def add_group_norm_eps(self, value: float) -> None:
740+
self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
741+
742+
def add_group_norm_groups(self, value: int) -> None:
743+
self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
744+
742745
def add_causal_attention(self, value: bool) -> None:
743746
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
744747

gguf-py/tests/test_quants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def compare_tensors(t1: np.ndarray, t2: np.ndarray, qtype: GGMLQuantizationType)
136136
logger.debug(f"Sample bad block ({diff_bits[bad_block_id]} differing bits):\n{t1[bad_block_id]}\nReference:\n{t2[bad_block_id]}")
137137

138138
sum_diff_bits = np.sum(diff_bits)
139-
logger.debug(f"{sum_diff_bits} bits differ ({100 * sum_diff_bits/(x.size * 8):.6f}%)")
139+
logger.debug(f"{sum_diff_bits} bits differ ({100 * sum_diff_bits / (x.size * 8):.6f}%)")
140140
return False
141141

142142

0 commit comments

Comments
 (0)