kv-cache : some comments

ggerganov · ggerganov · commit 07fb71afa0c2 · 2025-05-25T17:17:38.000+03:00
ggml-ci
diff --git a/src/llama-context.h b/src/llama-context.h
@@ -185,7 +185,7 @@ struct llama_context {
             ggml_cgraph * gf,
                    bool   batched);
 
-    // reserve a graph
+    // reserve a graph with a dummy ubatch of the specified size
     ggml_cgraph * graph_reserve(uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs);
 
 private:
diff --git a/src/llama-kv-cache.h b/src/llama-kv-cache.h
@@ -135,6 +135,7 @@ class llama_kv_cache_unified : public llama_kv_cache {
     ggml_tensor * cpy_k(ggml_context * ctx, ggml_tensor * k_cur, int32_t il) const;
     ggml_tensor * cpy_v(ggml_context * ctx, ggml_tensor * v_cur, int32_t il) const;
 
+    // find places for the provided ubatches in the cache, returns the head locations
     // return empty vector on failure
     std::vector<uint32_t> prepare(const std::vector<llama_ubatch> & ubatches);