Skip to content

Commit 4cae85c

Browse files
Changes in llama.cpp API
ggml-org/llama.cpp#11110
1 parent 484b3df commit 4cae85c

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

app/src/main/cpp/hips.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,9 +268,12 @@ extern "C" JNIEXPORT jboolean JNICALL Java_org_vonderheidt_hips_utils_LlamaCpp_i
268268
// Get model the context was created with
269269
const llama_model* model = llama_get_model(cppCtx);
270270

271+
// Get vocabulary of the model
272+
const llama_vocab* vocab = llama_model_get_vocab(model);
273+
271274
// Check if token is special
272275
// Token ID doesn't need casting because jint and llama_token are both just int32_t
273-
bool cppIsSpecial = llama_token_is_eog(model, token) || llama_token_is_control(model,token);
276+
bool cppIsSpecial = llama_vocab_is_eog(vocab, token) || llama_vocab_is_control(vocab,token);
274277

275278
// Cast boolean to return it
276279
// static_cast because casting booleans is type safe, unlike reinterpret_cast for casting C++ pointers to Java long
@@ -298,14 +301,17 @@ extern "C" JNIEXPORT jobjectArray JNICALL Java_org_vonderheidt_hips_utils_LlamaC
298301
// No need to specify cppModel in variable name as there is no jModel
299302
const llama_model* model = llama_get_model(cppCtx);
300303

304+
// Get vocabulary of the model
305+
const llama_vocab* vocab = llama_model_get_vocab(model);
306+
301307
// Copy token IDs from Java array to C++ array
302308
// Data types jint, jsize and int32_t are all equivalent
303309
jint* cppTokens = env -> GetIntArrayElements(jTokens, nullptr);
304310

305311
// C++ allows accessing illegal array indices and returns garbage values, doesn't throw IndexOutOfBoundsException like Java/Kotlin
306312
// Manually ensure that indices stay within dimensions n_tokens x n_vocab of the logit matrix
307313
jsize n_tokens = env -> GetArrayLength(jTokens);
308-
int32_t n_vocab = llama_n_vocab(model);
314+
int32_t n_vocab = llama_vocab_n_tokens(vocab);
309315

310316
// Store tokens to be processed in batch data structure
311317
// llama.cpp example cited below stores multiple tokens from tokenization of the prompt in the first run, single last sampled token in subsequent runs

0 commit comments

Comments
 (0)