File tree 1 file changed +8
-2
lines changed
1 file changed +8
-2
lines changed Original file line number Diff line number Diff line change @@ -948,8 +948,6 @@ int llama_context::decode(llama_batch & inp_batch) {
948
948
949
949
// find KV slot
950
950
if (!kv_self->find_slot (ubatch)) {
951
- LLAMA_LOG_WARN (" %s: failed to find KV cache slot for ubatch of size %d\n " , __func__, ubatch.n_tokens );
952
-
953
951
return 1 ;
954
952
}
955
953
@@ -2640,9 +2638,17 @@ int32_t llama_decode(
2640
2638
llama_batch batch) {
2641
2639
int ret = ctx->decode (batch);
2642
2640
2641
+ // defrag and try again
2642
+ // TODO: distinguish return code when we are sure that even after defrag there is no space available
2643
2643
if (ret == 1 ) {
2644
2644
llama_kv_self_defrag (ctx);
2645
2645
ret = ctx->decode (batch);
2646
+
2647
+ if (ret == 1 ) {
2648
+ LLAMA_LOG_WARN (" %s: failed to find KV cache slot for batch of size %d\n " , __func__, batch.n_tokens );
2649
+
2650
+ return ret;
2651
+ }
2646
2652
}
2647
2653
2648
2654
if (ret != 0 ) {
You can’t perform that action at this time.
0 commit comments