feat: Add a templated helper to the hybrid cache to retrieve a child

gabe-l-hart · gabe-l-hart · commit f857dc04d609 · 2025-05-16T15:25:37.000-06:00
This will be the public interface used by functions that need to access one
specific type of child. It's a bit brittle since the rest of the hybrid
class intentionally avoids expecting there to be exactly one unified child
and one recurrent child, but the idea is that this should only be used from
a context where that's known to be true.

Branch: GraniteFour

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/src/llama-kv-cache.h b/src/llama-kv-cache.h
@@ -418,6 +418,21 @@ class llama_kv_cache_hybrid : public llama_kv_cache {
         const llama_hparams            & hparams,
               std::vector<child_cache>   children);
 
+    // getters for specific child cache type
+    // NOTE: This will fail if there are multiple of the given type
+    template<typename child_t>
+    const child_t * get_child_cache() const {
+        const child_t * child = nullptr;
+        for (const auto & child_cache : m_children) {
+            const child_t * child_cast = dynamic_cast<const child_t *>(child_cache.get());
+            if (child_cast) {
+                GGML_ASSERT(!child);
+                child = child_cast;
+            }
+        }
+        return child;
+    }
+
     //
     // llama_memory_i
     //