Skip to content

Commit f72b6e9

Browse files
committed
Update llama.cpp
1 parent 15e0e0a commit f72b6e9

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

llama_cpp/llama_cpp.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,16 @@ class llama_token_data_array(Structure):
165165
# int32_t n_gpu_layers; // number of layers to store in VRAM
166166
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
167167
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
168+
169+
# // ref: https://github.com/ggerganov/llama.cpp/pull/2054
170+
# float rope_freq_base; // RoPE base frequency
171+
# float rope_freq_scale; // RoPE frequency scaling factor
172+
168173
# // called with a progress value between 0 and 1, pass NULL to disable
169174
# llama_progress_callback progress_callback;
170175
# // context pointer passed to the progress callback
171176
# void * progress_callback_user_data;
172177

173-
174178
# // Keep the booleans together to avoid misalignment during copy-by-value.
175179
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
176180
# bool f16_kv; // use fp16 for KV cache
@@ -188,6 +192,8 @@ class llama_context_params(Structure):
188192
("n_gpu_layers", c_int32),
189193
("main_gpu", c_int32),
190194
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
195+
("rope_freq_base", c_float),
196+
("rope_freq_scale", c_float),
191197
("progress_callback", llama_progress_callback),
192198
("progress_callback_user_data", c_void_p),
193199
("low_vram", c_bool),

vendor/llama.cpp

0 commit comments

Comments
 (0)