@@ -7757,9 +7757,9 @@ static bool llm_load_tensors(
7757
7757
}
7758
7758
} break;
7759
7759
case LLM_ARCH_DECI:
7760
- {
7760
+ {
7761
7761
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
7762
-
7762
+
7763
7763
// output
7764
7764
model.output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
7765
7765
model.output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_NOT_REQUIRED);
@@ -11028,9 +11028,9 @@ struct llm_build_context {
11028
11028
}
11029
11029
11030
11030
struct ggml_cgraph * build_deci() {
11031
- struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
11031
+ struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
11032
11032
11033
- // mutable variable, needed during the last layer of the computation to skip unused tokens
11033
+ // mutable variable, needed during the last layer of the computation to skip unused tokens
11034
11034
int32_t n_tokens = this->n_tokens;
11035
11035
11036
11036
const int64_t n_embd_head = hparams.n_embd_head_v;
@@ -11054,9 +11054,9 @@ struct llm_build_context {
11054
11054
const int64_t n_head_kv = hparams.n_head_kv(il);
11055
11055
const int64_t n_head = hparams.n_head(il);
11056
11056
11057
- if (n_head == 0) // attention-free layer of Llama-3_1-Nemotron-51B
11057
+ if (n_head == 0) { // attention-free layer of Llama-3_1-Nemotron-51B
11058
11058
cur = inpL;
11059
- else {
11059
+ } else {
11060
11060
// norm
11061
11061
cur = llm_build_norm(ctx0, inpL, hparams,
11062
11062
model.layers[il].attn_norm, NULL,
@@ -11067,9 +11067,8 @@ struct llm_build_context {
11067
11067
if (n_head > 0 && n_head_kv == 0) { // "linear attention" of Llama-3_1-Nemotron-51B
11068
11068
cur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wo, cur);
11069
11069
cb(cur, "wo", il);
11070
- } else if (n_head > 0)
11070
+ } else if (n_head > 0) {
11071
11071
// self-attention
11072
- {
11073
11072
// rope freq factors for llama3; may return nullptr for llama2 and other models
11074
11073
struct ggml_tensor * rope_factors = build_rope_factors(il);
11075
11074
0 commit comments