Skip to content

Commit 1a844be

Browse files
authored
convert : support rope_scaling type and rope_type (#13349)
1 parent 0ccc121 commit 1a844be

File tree

1 file changed

+58
-57
lines changed

1 file changed

+58
-57
lines changed

convert_hf_to_gguf.py

Lines changed: 58 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,10 +1388,10 @@ def set_gguf_parameters(self):
13881388
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
13891389
self.gguf_writer.add_file_type(self.ftype)
13901390

1391-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
1392-
if self.hparams["rope_scaling"].get("type") == "linear":
1393-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
1394-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
1391+
rope_scaling = self.hparams.get("rope_scaling") or {}
1392+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
1393+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
1394+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
13951395

13961396
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
13971397
head_count = self.hparams["num_attention_heads"]
@@ -1512,10 +1512,10 @@ def set_gguf_parameters(self):
15121512
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
15131513
self.gguf_writer.add_file_type(self.ftype)
15141514

1515-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
1516-
if self.hparams["rope_scaling"].get("type") == "linear":
1517-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
1518-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
1515+
rope_scaling = self.hparams.get("rope_scaling") or {}
1516+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
1517+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
1518+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
15191519

15201520
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
15211521
del bid # unused
@@ -1828,10 +1828,10 @@ def set_gguf_parameters(self):
18281828
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
18291829
self.gguf_writer.add_rope_dimension_count(rope_dim)
18301830

1831-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
1832-
if self.hparams["rope_scaling"].get("type") == "linear":
1833-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
1834-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
1831+
rope_scaling = self.hparams.get("rope_scaling") or {}
1832+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
1833+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
1834+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
18351835

18361836
@staticmethod
18371837
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
@@ -2206,10 +2206,10 @@ def set_gguf_parameters(self):
22062206
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
22072207
self.gguf_writer.add_rope_dimension_count(rope_dim)
22082208

2209-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
2210-
if self.hparams["rope_scaling"].get("type") == "linear":
2211-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
2212-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
2209+
rope_scaling = self.hparams.get("rope_scaling") or {}
2210+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
2211+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
2212+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
22132213

22142214
@staticmethod
22152215
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
@@ -2449,10 +2449,10 @@ def set_gguf_parameters(self):
24492449
logit_scale = self.hparams["hidden_size"] / self.hparams["dim_model_base"]
24502450
self.gguf_writer.add_logit_scale(logit_scale)
24512451
logger.info(f"gguf: (minicpm) logit_scale = {logit_scale}")
2452-
if self.hparams.get("rope_scaling") is not None:
2453-
if self.hparams["rope_scaling"].get("type") == "longrope":
2454-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LONGROPE)
2455-
logger.info(f"gguf: (minicpm) rope_scaling_type = {gguf.RopeScalingType.LONGROPE}")
2452+
rope_scaling = self.hparams.get("rope_scaling") or {}
2453+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "longrope":
2454+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LONGROPE)
2455+
logger.info(f"gguf: (minicpm) rope_scaling_type = {gguf.RopeScalingType.LONGROPE}")
24562456

24572457
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
24582458
rope_dims = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
@@ -2597,11 +2597,11 @@ def set_vocab(self):
25972597
def set_gguf_parameters(self):
25982598
super().set_gguf_parameters()
25992599
self._try_set_pooling_type()
2600-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
2601-
if self.hparams["rope_scaling"].get("type") == "yarn":
2602-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
2603-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
2604-
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
2600+
rope_scaling = self.hparams.get("rope_scaling") or {}
2601+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
2602+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
2603+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
2604+
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
26052605

26062606
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
26072607
if self.hf_arch == "Qwen2Model":
@@ -2763,11 +2763,11 @@ def set_gguf_parameters(self):
27632763
logger.info(f"gguf: expert shared feed forward length = {shared_expert_intermediate_size}")
27642764
# YaRN is not enabled by default
27652765
# To enable it, please refer to this guide: https://huggingface.co/Qwen/Qwen3-30B-A3B#processing-long-texts
2766-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
2767-
if self.hparams["rope_scaling"].get("type") == "yarn":
2768-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
2769-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
2770-
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
2766+
rope_scaling = self.hparams.get("rope_scaling") or {}
2767+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
2768+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
2769+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
2770+
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
27712771

27722772
_experts: list[dict[str, Tensor]] | None = None
27732773

@@ -3035,7 +3035,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
30353035

30363036
scale = max_pos_embds / orig_max_pos_embds
30373037

3038-
rope_scaling_type = rope_scaling.get('type', '').lower()
3038+
rope_scaling_type = rope_scaling.get('rope_type', rope_scaling.get('type', '')).lower()
30393039
if len(rope_scaling_type) == 0:
30403040
raise KeyError('Missing the required key rope_scaling.type')
30413041

@@ -3347,10 +3347,10 @@ def set_gguf_parameters(self):
33473347
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
33483348
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
33493349
self.gguf_writer.add_file_type(self.ftype)
3350-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
3351-
if self.hparams["rope_scaling"].get("type") == "linear":
3352-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
3353-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
3350+
rope_scaling = self.hparams.get("rope_scaling") or {}
3351+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
3352+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
3353+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
33543354

33553355
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
33563356
num_heads = self.hparams["num_attention_heads"]
@@ -3425,10 +3425,10 @@ def set_gguf_parameters(self):
34253425
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
34263426
self.gguf_writer.add_rope_dimension_count(rope_dim)
34273427

3428-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
3429-
if self.hparams["rope_scaling"].get("type") == "linear" or self.hparams["rope_scaling"].get("rope_type") == "linear":
3430-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
3431-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
3428+
rope_scaling = self.hparams.get("rope_scaling") or {}
3429+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
3430+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
3431+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
34323432

34333433
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
34343434
n_head = self.hparams["num_attention_heads"]
@@ -4866,12 +4866,12 @@ def set_gguf_parameters(self):
48664866

48674867
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
48684868

4869-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
4870-
if self.hparams["rope_scaling"].get("type") == "yarn":
4871-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
4872-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
4873-
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
4874-
self.gguf_writer.add_rope_scaling_yarn_log_mul(0.1 * hparams["rope_scaling"]["mscale_all_dim"])
4869+
rope_scaling = self.hparams.get("rope_scaling") or {}
4870+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
4871+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
4872+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
4873+
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
4874+
self.gguf_writer.add_rope_scaling_yarn_log_mul(0.1 * rope_scaling["mscale_all_dim"])
48754875

48764876
_experts: list[dict[str, Tensor]] | None = None
48774877

@@ -5363,11 +5363,11 @@ def set_gguf_parameters(self):
53635363
super().set_gguf_parameters()
53645364
rope_dim = self.hparams["head_dim"]
53655365
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5)))
5366-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
5367-
if self.hparams["rope_scaling"].get("type") == "yarn":
5368-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
5369-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
5370-
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
5366+
rope_scaling = self.hparams.get("rope_scaling") or {}
5367+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
5368+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
5369+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
5370+
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
53715371

53725372

53735373
@ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
@@ -5600,10 +5600,10 @@ def set_gguf_parameters(self):
56005600
rotary_factor = self.find_hparam(["partial_rotary_factor", "rope_pct"], optional=True)
56015601
rotary_factor = rotary_factor if rotary_factor is not None else 1.0
56025602
self.gguf_writer.add_rope_dimension_count(int(rotary_factor * (hparams["hidden_size"] // hparams["num_attention_heads"])))
5603-
if hparams.get("rope_scaling") is not None and "factor" in hparams["rope_scaling"]:
5604-
if hparams["rope_scaling"].get("type") == "linear":
5605-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
5606-
self.gguf_writer.add_rope_scaling_factor(hparams["rope_scaling"]["factor"])
5603+
rope_scaling = self.hparams.get("rope_scaling") or {}
5604+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
5605+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
5606+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
56075607

56085608
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
56095609
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
@@ -5706,10 +5706,11 @@ def set_gguf_parameters(self):
57065706
rope_dim = hparams.get("head_dim") or hparams["hidden_size"] // hparams["num_attention_heads"]
57075707

57085708
self.gguf_writer.add_rope_dimension_count(rope_dim)
5709-
if (self.hparams.get("rope_scaling") or {}).get("type") == "yarn" and "factor" in self.hparams["rope_scaling"]:
5709+
rope_scaling = self.hparams.get("rope_scaling") or {}
5710+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
57105711
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
5711-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
5712-
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
5712+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
5713+
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
57135714
else:
57145715
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
57155716
self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])

0 commit comments

Comments
 (0)