Skip to content

Commit 0073157

Browse files
committed
kv-cache : update warning logs when no space for the batch is available
ggml-ci
1 parent 86c526a commit 0073157

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

src/llama-context.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -948,8 +948,6 @@ int llama_context::decode(llama_batch & inp_batch) {
948948

949949
// find KV slot
950950
if (!kv_self->find_slot(ubatch)) {
951-
LLAMA_LOG_WARN("%s: failed to find KV cache slot for ubatch of size %d\n", __func__, ubatch.n_tokens);
952-
953951
return 1;
954952
}
955953

@@ -2640,9 +2638,17 @@ int32_t llama_decode(
26402638
llama_batch batch) {
26412639
int ret = ctx->decode(batch);
26422640

2641+
// defrag and try again
2642+
// TODO: distinguish return code when we are sure that even after defrag there is no space available
26432643
if (ret == 1) {
26442644
llama_kv_self_defrag(ctx);
26452645
ret = ctx->decode(batch);
2646+
2647+
if (ret == 1) {
2648+
LLAMA_LOG_WARN("%s: failed to find KV cache slot for batch of size %d\n", __func__, batch.n_tokens);
2649+
2650+
return ret;
2651+
}
26462652
}
26472653

26482654
if (ret != 0) {

0 commit comments

Comments
 (0)