@@ -8368,7 +8368,7 @@ static bool llm_load_tensors(
8368
8368
const int time_decay_extra_dim = (n_embd == 4096) ? 128 : 64;
8369
8369
const int head_size = hparams.wkv_head_size;
8370
8370
const int attn_hidden_size = n_embd;
8371
- const int ffn_size = (int)(n_embd * 3.5 / 32) * 32 ;
8371
+ const int ffn_size = hparams.n_ff_arr[0] ;
8372
8372
8373
8373
for (int i = 0; i < n_layer; ++i) {
8374
8374
ggml_context * ctx_layer = ctx_for_layer(i);
@@ -8391,7 +8391,6 @@ static bool llm_load_tensors(
8391
8391
layer.time_mix_lerp_r = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_TIME_MIX_LERP_R, "weight", i), {n_embd, 1, 1});
8392
8392
layer.time_mix_lerp_g = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_TIME_MIX_LERP_G, "weight", i), {n_embd, 1, 1});
8393
8393
8394
- // TODO: Parametrize hardcoded dimensions for first & decay
8395
8394
layer.time_mix_first = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_TIME_MIX_FIRST, "weight", i), {head_size, n_embd / head_size});
8396
8395
layer.time_mix_decay = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_TIME_MIX_DECAY, "weight", i), {n_embd});
8397
8396
layer.time_mix_decay_w1 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_TIME_MIX_DECAY_W1, "weight", i), {n_embd, time_decay_extra_dim});
0 commit comments