tests: Use real params for allocation in all tests

gabe-l-hart · gabe-l-hart · commit 60ed2669a75d · 2025-05-16T16:02:04.000-06:00
Branch: HybridCache

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/tests/test-memory.cpp b/tests/test-memory.cpp
@@ -12,12 +12,40 @@
 
 /*- Helpers ------------------------------------------------------------------*/
 
-static std::shared_ptr<llama_model> _make_model() {
+static std::shared_ptr<llama_model> _make_model(
+    llm_arch arch = LLM_ARCH_LLAMA,
+    uint32_t n_layer = 4,
+    uint32_t n_embd_head_k = 4,
+    uint32_t n_embd_head_v = 4,
+    uint32_t n_head = 8,
+    uint32_t n_head_kv = 2) {
+
     llama_model_params params;
     params.tensor_buft_overrides = nullptr;
     std::shared_ptr<llama_model> model(new llama_model(params));
     model->hparams = llama_hparams();
-    model->arch = LLM_ARCH_LLAMA;
+    model->arch = arch;
+
+    model->hparams.n_layer = n_layer;
+    model->hparams.n_embd_head_k = n_embd_head_k;
+    model->hparams.n_embd_head_v = n_embd_head_v;
+
+    auto& recurrent_layer_arr = model->hparams.recurrent_layer_arr;
+    std::fill(
+        recurrent_layer_arr.begin(),
+        recurrent_layer_arr.end(),
+        llm_arch_is_recurrent(arch));
+
+    // If set to 0, assume the test will fill out the array elementwise (hybrid)
+    if (n_head > 0) {
+        auto& n_head_arr = model->hparams.n_head_arr;
+        std::fill(n_head_arr.begin(), n_head_arr.end(), n_head);
+    }
+    if (n_head_kv > 0) {
+        auto& n_head_kv_arr = model->hparams.n_head_kv_arr;
+        std::fill(n_head_kv_arr.begin(), n_head_kv_arr.end(), n_head_kv);
+    }
+
     return model;
 }
 
@@ -57,7 +85,7 @@ static void test_llama_kv_cache_unified_constructor() {
 /* Test that the recurrent cache can be constructed and destructed safely */
 static void test_llama_kv_cache_recurrent_constructor() {
     LOG_SCOPE();
-    auto model = _make_model();
+    auto model = _make_model(LLM_ARCH_MAMBA);
     llama_kv_cache_recurrent cache(
         /* model   */ *model,
         /* type_k  */ GGML_TYPE_F32,
@@ -72,15 +100,24 @@ static void test_llama_kv_cache_recurrent_constructor() {
 /* Test that the hybrid cache can be constructed and destructed safely */
 static void test_llama_kv_cache_hybrid_constructor() {
     LOG_SCOPE();
-    auto model = _make_model();
-    model->hparams.n_layer = 4;
-    model->hparams.n_embd_head_k = 4;
-    model->hparams.n_embd_head_v = 4;
+    auto model = _make_model(
+        /* arch          =*/ LLM_ARCH_LLAMA,
+        /* n_layer       =*/ 4,
+        /* n_embd_head_k =*/ 4,
+        /* n_embd_head_v =*/ 4,
+        /* n_head        =*/ 0,
+        /* n_head_kv     =*/ 0
+    );
     auto& recurrent_layer_arr = model->hparams.recurrent_layer_arr;
     recurrent_layer_arr[0] = 1;
     recurrent_layer_arr[1] = 0;
     recurrent_layer_arr[2] = 1;
     recurrent_layer_arr[3] = 0;
+    auto& n_head_arr = model->hparams.n_head_arr;
+    n_head_arr[0] = 16;
+    n_head_arr[1] = 32;
+    n_head_arr[2] = 16;
+    n_head_arr[3] = 32;
     auto& n_head_kv_arr = model->hparams.n_head_kv_arr;
     n_head_kv_arr[0] = 16;
     n_head_kv_arr[1] = 8;