kv-cache : fix the logic for iSWA pos_max

ggerganov · ggerganov · commit 7602b9678210 · 2025-05-17T16:55:41.000+03:00
diff --git a/include/llama.h b/include/llama.h
@@ -733,7 +733,7 @@ extern "C" {
     // Returns the largest position present in the KV cache for the specified sequence
     LLAMA_API llama_pos llama_kv_self_seq_pos_max(
             struct llama_context * ctx,
-                     llama_seq_id   seq_id);
+                    llama_seq_id   seq_id);
 
     // Defragment the KV cache
     // This will be applied:
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
@@ -1689,7 +1689,8 @@ void llama_kv_cache_unified_iswa::seq_div(llama_seq_id seq_id, llama_pos p0, lla
 }
 
 llama_pos llama_kv_cache_unified_iswa::seq_pos_max(llama_seq_id seq_id) const {
-    return kv_base->seq_pos_max(seq_id);
+    // for a position to be considered present in the iSWA cache, it has to be present in both the base and the SWA caches
+    return std::min(kv_base->seq_pos_max(seq_id), kv_swa->seq_pos_max(seq_id));
 }
 
 void llama_kv_cache_unified_iswa::restore() {

Original file line number	Diff line number	Diff line change
`@@ -1689,7 +1689,8 @@ void llama_kv_cache_unified_iswa::seq_div(llama_seq_id seq_id, llama_pos p0, lla`
`1689`	`1689`	`}`
`1690`	`1690`
`1691`	`1691`	`llama_pos llama_kv_cache_unified_iswa::seq_pos_max(llama_seq_id seq_id) const {`
`1692`		`- return kv_base->seq_pos_max(seq_id);`
	`1692`	`+ // for a position to be considered present in the iSWA cache, it has to be present in both the base and the SWA caches`
	`1693`	`+ return std::min(kv_base->seq_pos_max(seq_id), kv_swa->seq_pos_max(seq_id));`
`1693`	`1694`	`}`
`1694`	`1695`
`1695`	`1696`	`void llama_kv_cache_unified_iswa::restore() {`