Skip to content

Commit 7602b96

Browse files
committed
kv-cache : fix the logic for iSWA pos_max
1 parent 0073157 commit 7602b96

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

include/llama.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,7 @@ extern "C" {
733733
// Returns the largest position present in the KV cache for the specified sequence
734734
LLAMA_API llama_pos llama_kv_self_seq_pos_max(
735735
struct llama_context * ctx,
736-
llama_seq_id seq_id);
736+
llama_seq_id seq_id);
737737

738738
// Defragment the KV cache
739739
// This will be applied:

src/llama-kv-cache.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1689,7 +1689,8 @@ void llama_kv_cache_unified_iswa::seq_div(llama_seq_id seq_id, llama_pos p0, lla
16891689
}
16901690

16911691
llama_pos llama_kv_cache_unified_iswa::seq_pos_max(llama_seq_id seq_id) const {
1692-
return kv_base->seq_pos_max(seq_id);
1692+
// for a position to be considered present in the iSWA cache, it has to be present in both the base and the SWA caches
1693+
return std::min(kv_base->seq_pos_max(seq_id), kv_swa->seq_pos_max(seq_id));
16931694
}
16941695

16951696
void llama_kv_cache_unified_iswa::restore() {

0 commit comments

Comments
 (0)