@@ -2476,7 +2476,7 @@ static void llm_load_tensors(
2476
2476
}
2477
2477
} break ;
2478
2478
case LLM_ARCH_PERSIMMON:
2479
- {
2479
+ {
2480
2480
model.tok_embeddings = ml.create_tensor (ctx, tn (LLM_TENSOR_TOKEN_EMBD, " weight" ), {n_embd, n_vocab}, GGML_BACKEND_CPU);
2481
2481
model.output_norm = ml.create_tensor (ctx, tn (LLM_TENSOR_OUTPUT_NORM, " weight" ), {n_embd}, GGML_BACKEND_CPU);
2482
2482
model.output_norm_b = ml.create_tensor (ctx, tn (LLM_TENSOR_OUTPUT_NORM, " bias" ), {n_embd}, GGML_BACKEND_CPU);
@@ -4162,8 +4162,8 @@ static struct ggml_cgraph * llm_build_persimmon(
4162
4162
cur = ggml_add (ctx0, cur, model.layers [il].attn_norm_b );
4163
4163
offload_func (cur);
4164
4164
ggml_format_name (cur, " input_layernorm_%d" , il);
4165
- }
4166
- // self attention
4165
+ }
4166
+ // self attention
4167
4167
{
4168
4168
cur = ggml_mul_mat (ctx0, model.layers [il].wqkv , cur);
4169
4169
offload_func_kq (cur);
@@ -4206,7 +4206,7 @@ static struct ggml_cgraph * llm_build_persimmon(
4206
4206
offload_func_v (tmpk);
4207
4207
tmpk = ggml_add (ctx0, tmpk, model.layers [il].attn_k_norm_b );
4208
4208
offload_func_v (tmpk);
4209
-
4209
+
4210
4210
// RoPE the first n_rot of q/k, pass the other half, and concat.
4211
4211
struct ggml_tensor * qrot = ggml_view_3d (
4212
4212
ctx0, tmpq, n_rot, n_head, n_tokens,
@@ -4227,7 +4227,7 @@ static struct ggml_cgraph * llm_build_persimmon(
4227
4227
4228
4228
// get the second half of tmpq, e.g tmpq[n_rot:, :, :]
4229
4229
struct ggml_tensor * qpass = ggml_view_3d (
4230
- ctx0, tmpq, n_rot, n_head, n_tokens,
4230
+ ctx0, tmpq, n_rot, n_head, n_tokens,
4231
4231
ggml_element_size (tmpq) * n_embd_head,
4232
4232
ggml_element_size (tmpq) * n_embd_head * n_head,
4233
4233
ggml_element_size (tmpq) * n_rot
@@ -4328,9 +4328,9 @@ static struct ggml_cgraph * llm_build_persimmon(
4328
4328
offload_func_kq (KQ_soft_max);
4329
4329
ggml_set_name (KQ_soft_max, " KQ_soft_max" );
4330
4330
4331
- struct ggml_tensor * V =
4331
+ struct ggml_tensor * V =
4332
4332
ggml_view_3d (ctx0, kv_self.v ,
4333
- n_kv, n_embd_head, n_head_kv,
4333
+ n_kv, n_embd_head, n_head_kv,
4334
4334
ggml_element_size (kv_self.v )*n_ctx,
4335
4335
ggml_element_size (kv_self.v )*n_ctx*n_embd_head,
4336
4336
ggml_element_size (kv_self.v )*n_ctx*n_embd_gqa*il);
@@ -4361,11 +4361,11 @@ static struct ggml_cgraph * llm_build_persimmon(
4361
4361
ggml_set_name (inpFF, " inpFF" );
4362
4362
{
4363
4363
// MLP
4364
- {
4364
+ {
4365
4365
// Norm
4366
4366
cur = ggml_norm (ctx0, inpFF, norm_eps);
4367
4367
offload_func (cur);
4368
- cur = ggml_add (ctx0,
4368
+ cur = ggml_add (ctx0,
4369
4369
ggml_mul (ctx0, cur, model.layers [il].ffn_norm ),
4370
4370
model.layers [il].ffn_norm_b
4371
4371
);
@@ -4386,7 +4386,7 @@ static struct ggml_cgraph * llm_build_persimmon(
4386
4386
4387
4387
cur = ggml_mul_mat (ctx0, model.layers [il].w2 , cur);
4388
4388
offload_func (cur);
4389
- cur = ggml_add (ctx0,
4389
+ cur = ggml_add (ctx0,
4390
4390
cur,
4391
4391
model.layers [il].b2 );
4392
4392
offload_func (cur);
0 commit comments