@@ -165,12 +165,16 @@ class llama_token_data_array(Structure):
165
165
# int32_t n_gpu_layers; // number of layers to store in VRAM
166
166
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
167
167
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
168
+
169
+ # // ref: https://github.com/ggerganov/llama.cpp/pull/2054
170
+ # float rope_freq_base; // RoPE base frequency
171
+ # float rope_freq_scale; // RoPE frequency scaling factor
172
+
168
173
# // called with a progress value between 0 and 1, pass NULL to disable
169
174
# llama_progress_callback progress_callback;
170
175
# // context pointer passed to the progress callback
171
176
# void * progress_callback_user_data;
172
177
173
-
174
178
# // Keep the booleans together to avoid misalignment during copy-by-value.
175
179
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
176
180
# bool f16_kv; // use fp16 for KV cache
@@ -188,6 +192,8 @@ class llama_context_params(Structure):
188
192
("n_gpu_layers" , c_int32 ),
189
193
("main_gpu" , c_int32 ),
190
194
("tensor_split" , c_float * LLAMA_MAX_DEVICES .value ),
195
+ ("rope_freq_base" , c_float ),
196
+ ("rope_freq_scale" , c_float ),
191
197
("progress_callback" , llama_progress_callback ),
192
198
("progress_callback_user_data" , c_void_p ),
193
199
("low_vram" , c_bool ),
0 commit comments