correct token pos in llama_batch_allocr

ngxson · ngxson · commit 5d99ae447b85 · 2024-10-18T15:56:28.000+02:00
diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp
@@ -376,7 +376,7 @@ int main(int argc, char ** argv) {
                     n_past, n_left, n_ctx, params.n_keep, n_discard);
 
                 llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1            , params.n_keep + n_discard + 1);
-                llama_kv_cache_seq_add(ctx, 0, params.n_keep + 1 + n_discard, n_past + 1,    -n_discard);
+                llama_kv_cache_seq_add(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard);
 
                 n_past -= n_discard;
 
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -21141,7 +21141,7 @@ struct llama_batch_allocr {
         batch = in_batch;
         if (!batch.pos) {
             // determine the last position in KV cache
-            llama_pos last_pos = 0;
+            llama_pos last_pos = -1;
             for (const auto & cell : ctx->kv_self.cells) {
                 if (cell.has_seq_id(batch_default_seq_id)) {
                     last_pos = std::max(last_pos, cell.pos);