We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 23e1e54 commit 0b73da5Copy full SHA for 0b73da5
include/llama.h
@@ -692,13 +692,14 @@ extern "C" {
692
// This will be applied:
693
// - lazily on next llama_decode()
694
// - explicitly with llama_kv_self_update()
695
+ // TODO: deprecate and always update the cache lazily [TAG: API_KV_NO_DEFRAG]
696
LLAMA_API void llama_kv_self_defrag(struct llama_context * ctx);
697
698
// Check if the context supports KV cache shifting
699
LLAMA_API bool llama_kv_self_can_shift(const struct llama_context * ctx);
700
701
// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
- // TODO: deprecate and always update the cache lazily
702
703
LLAMA_API void llama_kv_self_update(struct llama_context * ctx);
704
705
//
0 commit comments