Skip to content

Commit b83728a

Browse files
committed
Update llama.cpp
1 parent a4fe3fe commit b83728a

File tree

2 files changed

+5
-9
lines changed

2 files changed

+5
-9
lines changed

llama_cpp/llama_cpp.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ class llama_token_data_array(Structure):
164164
# int32_t n_batch; // prompt processing batch size
165165
# int32_t n_gpu_layers; // number of layers to store in VRAM
166166
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
167-
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
167+
# const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
168168

169169
# // ref: https://github.com/ggerganov/llama.cpp/pull/2054
170170
# float rope_freq_base; // RoPE base frequency
@@ -192,7 +192,7 @@ class llama_context_params(Structure):
192192
("n_batch", c_int32),
193193
("n_gpu_layers", c_int32),
194194
("main_gpu", c_int32),
195-
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
195+
("tensor_split", POINTER(c_float)),
196196
("rope_freq_base", c_float),
197197
("rope_freq_scale", c_float),
198198
("progress_callback", llama_progress_callback),
@@ -933,22 +933,19 @@ def llama_sample_frequency_and_presence_penalties(
933933
# /// @param candidates A vector of `llama_token_data` containing the candidate tokens, the logits must be directly extracted from the original generation context without being sorted.
934934
# /// @params guidance_ctx A separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context.
935935
# /// @params scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance.
936-
# /// @params smooth_factor Smooth factor between guidance logits and original logits. 1.0f means only use guidance logits. 0.0f means only original logits.
937936
# LLAMA_API void llama_sample_classifier_free_guidance(
938937
# struct llama_context * ctx,
939938
# llama_token_data_array * candidates,
940939
# struct llama_context * guidance_ctx,
941-
# float scale,
942-
# float smooth_factor);
940+
# float scale);
943941
def llama_sample_classifier_free_guidance(
944942
ctx: llama_context_p,
945943
candidates, # type: _Pointer[llama_token_data_array]
946944
guidance_ctx: llama_context_p,
947945
scale: c_float,
948-
smooth_factor: c_float,
949946
):
950947
return _lib.llama_sample_classifier_free_guidance(
951-
ctx, candidates, guidance_ctx, scale, smooth_factor
948+
ctx, candidates, guidance_ctx, scale
952949
)
953950

954951

@@ -957,7 +954,6 @@ def llama_sample_classifier_free_guidance(
957954
llama_token_data_array_p,
958955
llama_context_p,
959956
c_float,
960-
c_float,
961957
]
962958
_lib.llama_sample_classifier_free_guidance.restype = None
963959

vendor/llama.cpp

0 commit comments

Comments
 (0)