Skip to content

Commit d8a3ddb

Browse files
committed
Update llama.cpp
1 parent 985d559 commit d8a3ddb

File tree

2 files changed

+3
-1
lines changed

2 files changed

+3
-1
lines changed

llama_cpp/llama_cpp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ class llama_token_data_array(Structure):
163163
# int32_t n_ctx; // text context
164164
# int32_t n_batch; // prompt processing batch size
165165
# int32_t n_gqa; // grouped-query attention (TEMP - will be moved to model hparams)
166+
# float rms_norm_eps; // rms norm epsilon (TEMP - will be moved to model hparams)
166167
# int32_t n_gpu_layers; // number of layers to store in VRAM
167168
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
168169
#
@@ -193,6 +194,7 @@ class llama_context_params(Structure):
193194
("n_ctx", c_int32),
194195
("n_batch", c_int32),
195196
("n_gqa", c_int32),
197+
("rms_norm_eps", c_float),
196198
("n_gpu_layers", c_int32),
197199
("main_gpu", c_int32),
198200
("tensor_split", POINTER(c_float)),

vendor/llama.cpp

0 commit comments

Comments
 (0)