fix: Give ownership of child caches to the hybrid cache

gabe-l-hart · gabe-l-hart · commit 0b3853d2ccdf · 2025-05-13T10:42:23.000-06:00
The parent should fully own the lifecycle of the children which is managed
by the m_children member holding unique_ptrs. These need to be initialized
correctly, so the constructor now takes the input vector of child_cache by
value instead of reference so that the child pointers can be transferred to
the parent cache. The expectation is that the vector of child_cache
instances will be instantiated in-place with move semantics.

Branch: HybridCache

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
@@ -2419,23 +2419,23 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce
 // llama_kv_cache_hybrid
 //
 llama_kv_cache_hybrid::llama_kv_cache_hybrid(
-    const llama_hparams & hparams,
-    const std::vector<child_cache> & children) :
+    const llama_hparams            & hparams,
+          std::vector<child_cache>   children) :
     m_hparams(hparams),
     m_layer_cache_map(
         [](const std::vector<child_cache>& caches) -> std::unordered_map<size_t, llama_kv_cache*> {
             std::unordered_map<size_t, llama_kv_cache*> map;
             for (const auto & cache : caches) {
                 for (size_t layer_id : cache.layer_ids) {
-                    map[layer_id] = cache.child;
+                    map[layer_id] = cache.child.get();
                 }
             }
 
             return map;
         }(children)
     ),
     m_children(
-        [](std::vector<child_cache> caches) -> std::set<llama_kv_cache*> {
+        [](std::vector<child_cache>& caches) -> std::set<std::unique_ptr<llama_kv_cache>> {
             // Sort the caches by the lowest layer ID so the order is repeatable
             for (auto & cache : caches) {
                 GGML_ASSERT(cache.layer_ids.size() > 0);
@@ -2444,22 +2444,22 @@ llama_kv_cache_hybrid::llama_kv_cache_hybrid(
             std::sort(caches.begin(), caches.end(), [](const child_cache & a, const child_cache & b) {
                 return a.layer_ids[0] < b.layer_ids[0];
             });
-            std::set<llama_kv_cache*> unique_caches;
-            for (const auto & cache : caches) {
-                unique_caches.insert(cache.child);
+            std::set<std::unique_ptr<llama_kv_cache>> unique_caches;
+            for (auto & cache : caches) {
+                unique_caches.emplace(cache.child.release());
             }
             return unique_caches;
         }(children)
     ),
     m_has_recurrent(
-        [](const std::vector<child_cache>& caches) -> bool {
+        [](const std::set<std::unique_ptr<llama_kv_cache>> & caches) -> bool {
             for (const auto & cache : caches) {
-                if (dynamic_cast<llama_kv_cache_recurrent *>(cache.child)) {
+                if (dynamic_cast<llama_kv_cache_recurrent *>(cache.get())) {
                     return true;
                 }
             }
             return false;
-        }(children)
+        }(m_children)
     )
 {
     // Ensure at least one child
diff --git a/src/llama-kv-cache.h b/src/llama-kv-cache.h
@@ -413,13 +413,16 @@ class llama_kv_cache_hybrid : public llama_kv_cache {
 public:
 
     struct child_cache {
-        llama_kv_cache * child;
-        std::vector<size_t> layer_ids;
+        std::unique_ptr<llama_kv_cache> child;
+        std::vector<size_t>             layer_ids;
+
+        child_cache(std::unique_ptr<llama_kv_cache> child_, std::vector<size_t> layer_ids_)
+            : child(std::move(child_)), layer_ids(std::move(layer_ids_)) {}
     };
 
     llama_kv_cache_hybrid(
         const llama_hparams            & hparams,
-        const std::vector<child_cache> & children);
+              std::vector<child_cache>   children);
 
     //
     // llama_memory_i
@@ -476,7 +479,7 @@ class llama_kv_cache_hybrid : public llama_kv_cache {
 
     const llama_hparams                                & m_hparams;
     const std::unordered_map<size_t, llama_kv_cache *>   m_layer_cache_map;
-    const std::set<llama_kv_cache *>                     m_children; // Ordered for state IO
+    const std::set<std::unique_ptr<llama_kv_cache>>      m_children; // Ordered for state IO
     const bool                                           m_has_recurrent;
 };