Skip to content

Commit 1e0c15e

Browse files
nyxkragedsx1986
authored andcommitted
llama : fix llama3.1 rope_freqs not respecting custom head_dim (ggml-org#9141)
* fix: llama3.1 rope_freqs not respecting custom head_dim * fix: use potential head_dim for Exaone
1 parent f9e8109 commit 1e0c15e

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

convert_hf_to_gguf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,7 +1572,7 @@ def prepare_tensors(self):
15721572
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
15731573
if rope_scaling.get("rope_type", '').lower() == "llama3":
15741574
base = self.hparams.get("rope_theta", 10000.0)
1575-
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
1575+
dim = self.hparams.get("head_dim", self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
15761576
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
15771577

15781578
factor = rope_scaling.get("factor", 8.0)
@@ -3820,7 +3820,7 @@ def prepare_tensors(self):
38203820
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
38213821
if rope_scaling.get("rope_type", '').lower() == "llama3":
38223822
base = self.hparams.get("rope_theta", 10000.0)
3823-
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
3823+
dim = self.hparams.get("head_dim", self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
38243824
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
38253825

38263826
factor = rope_scaling.get("factor", 8.0)

src/llama.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6605,6 +6605,7 @@ static bool llm_load_tensors(
66056605
const int64_t n_embd_gqa = n_embd_v_gqa;
66066606
const int64_t n_vocab = hparams.n_vocab;
66076607
const int64_t n_vocab_type = hparams.n_vocab_type;
6608+
const int64_t n_rot = hparams.n_rot;
66086609
const int64_t n_expert = hparams.n_expert;
66096610
const int64_t n_expert_used = hparams.n_expert_used;
66106611
const int64_t n_ctx_train = hparams.n_ctx_train;
@@ -6662,7 +6663,7 @@ static bool llm_load_tensors(
66626663

66636664
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
66646665

6665-
layer.rope_freqs = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ROPE_FREQS, "weight"), {n_embd/n_head/2}, llama_model_loader::TENSOR_NOT_REQUIRED | (i != 0 ? llama_model_loader::TENSOR_DUPLICATED : 0));
6666+
layer.rope_freqs = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ROPE_FREQS, "weight"), {n_rot/2}, llama_model_loader::TENSOR_NOT_REQUIRED | (i != 0 ? llama_model_loader::TENSOR_DUPLICATED : 0));
66666667

66676668
if (n_expert == 0) {
66686669
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
@@ -8193,7 +8194,7 @@ static bool llm_load_tensors(
81938194
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd});
81948195

81958196
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd});
8196-
layer.rope_freqs = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ROPE_FREQS, "weight"), {n_embd/n_head/2}, llama_model_loader::TENSOR_NOT_REQUIRED | (i != 0 ? llama_model_loader::TENSOR_DUPLICATED : 0));
8197+
layer.rope_freqs = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ROPE_FREQS, "weight"), {n_rot/2}, llama_model_loader::TENSOR_NOT_REQUIRED | (i != 0 ? llama_model_loader::TENSOR_DUPLICATED : 0));
81978198
layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff});
81988199
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd});
81998200
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});

0 commit comments

Comments
 (0)