@@ -7763,7 +7763,7 @@ static bool llm_load_tensors(
7763
7763
// output
7764
7764
model.output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
7765
7765
model.output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_NOT_REQUIRED);
7766
-
7766
+
7767
7767
// if output is NULL, init from the input tok embed
7768
7768
if (model.output == NULL) {
7769
7769
model.output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_DUPLICATED);
@@ -11029,19 +11029,19 @@ struct llm_build_context {
11029
11029
11030
11030
struct ggml_cgraph * build_deci() {
11031
11031
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
11032
-
11032
+
11033
11033
// mutable variable, needed during the last layer of the computation to skip unused tokens
11034
11034
int32_t n_tokens = this->n_tokens;
11035
-
11035
+
11036
11036
const int64_t n_embd_head = hparams.n_embd_head_v;
11037
11037
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
11038
11038
GGML_ASSERT(n_embd_head == hparams.n_rot);
11039
-
11039
+
11040
11040
struct ggml_tensor * cur;
11041
11041
struct ggml_tensor * inpL;
11042
11042
11043
11043
inpL = llm_build_inp_embd(ctx0, lctx, hparams, ubatch, model.tok_embd, cb);
11044
-
11044
+
11045
11045
// inp_pos - contains the positions
11046
11046
struct ggml_tensor * inp_pos = build_inp_pos();
11047
11047
0 commit comments