tests: Add a test for finding a slot for a single sequence

gabe-l-hart · gabe-l-hart · commit ec085712e813 · 2025-05-19T17:05:13.000-06:00
Branch: HybridCache

Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/tests/test-memory.cpp b/tests/test-memory.cpp
@@ -13,11 +13,13 @@
  *----------------------------------------------------------------------------*/
 
 #include "../src/llama-arch.h"
+#include "../src/llama-batch.h"
 #include "../src/llama-hparams.h"
 #include "../src/llama-impl.h"
 #include "../src/llama-kv-cache.h"
 #include "../src/llama-model.h"
 
+#include "common.h"
 #include "llama.h"
 
 #include <algorithm>
@@ -103,6 +105,58 @@ static void test_llama_kv_cache_unified_constructor() {
     );
 }
 
+/* Test that the unified cache can operate with a single seq */
+static void test_llama_kv_cache_unified_single_seq() {
+    auto model = _make_model();
+    llama_kv_cache_unified cache(
+        /* model   */ *model,
+        /* type_k  */ GGML_TYPE_F16,
+        /* type_v  */ GGML_TYPE_F16,
+        /* v_trans */ false,
+        /* offload */ false,
+        /* kv_size */ 10,
+        /* padding */ 10
+    );
+    GGML_ASSERT(cache.get_used_cells() == 0);
+
+    // Create the micro batch with a single 3-token sequence
+    //
+    // NOTE: A bunch of these asserts were just me figuring out how the batches
+    //  relate to each other, but they're left for future readers to help in the
+    //  same understanding process.
+    llama_seq_id seq_id = 42;
+    llama_batch batch = llama_batch_init(3, 0, 1);
+    common_batch_add(batch, 101, 0, {seq_id}, false);
+    common_batch_add(batch, 1,   1, {seq_id}, false);
+    common_batch_add(batch, 102, 2, {seq_id}, false);
+    llama_sbatch sbatch(batch, 0, true, false);
+    GGML_ASSERT(batch.n_tokens == 3);
+    GGML_ASSERT(sbatch.n_tokens == 3);
+    GGML_ASSERT(!sbatch.seq.empty());
+    llama_ubatch ubatch = sbatch.split_simple(4);
+    printf("ubatch.n_seqs=%d\n", ubatch.n_seqs);
+    GGML_ASSERT(ubatch.n_seqs == 3);
+    GGML_ASSERT(ubatch.n_seq_tokens == 1);
+    GGML_ASSERT(ubatch.n_tokens == 3);
+    GGML_ASSERT(ubatch.seq_id[0][0] == seq_id);
+    GGML_ASSERT(ubatch.seq_id[1][0] == seq_id);
+    GGML_ASSERT(ubatch.seq_id[2][0] == seq_id);
+
+    // Find a slot for a new sequence
+    GGML_ASSERT(cache.find_slot(ubatch));
+    printf("cache.head=%d\n", cache.head);
+    GGML_ASSERT(cache.head == 0); // Ready to start filling at the beginning
+    GGML_ASSERT(cache.used == 3);
+    for (int i = 0; i < 3; ++i) {
+        GGML_ASSERT(cache.cells[i].seq_id.size() == 1);
+        GGML_ASSERT(*cache.cells[i].seq_id.begin() == seq_id);
+        GGML_ASSERT(cache.cells[i].pos == i);
+    }
+
+    // Clean up
+    llama_batch_free(batch);
+}
+
 /*- Recurrent Cache ----------------------------------------------------------*/
 
 /* Test that the recurrent cache can be constructed and destructed safely */