Skip to content

Commit 65cde6d

Browse files
committed
kv-cache : fix recurrent seq_rm()
ggml-ci
1 parent fa47763 commit 65cde6d

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

src/llama-kv-cache.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1485,6 +1485,8 @@ void llama_kv_cache_recurrent::clear() {
14851485
}
14861486

14871487
bool llama_kv_cache_recurrent::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1) {
1488+
uint32_t new_head = size;
1489+
14881490
if (p0 < 0) {
14891491
p0 = 0;
14901492
}
@@ -1518,6 +1520,34 @@ bool llama_kv_cache_recurrent::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_p
15181520
}
15191521
}
15201522

1523+
for (uint32_t i = 0; i < size; ++i) {
1524+
if (cells[i].pos >= p0 && cells[i].pos < p1) {
1525+
if (seq_id < 0) {
1526+
cells[i].seq_id.clear();
1527+
} else if (cells[i].has_seq_id(seq_id)) {
1528+
cells[i].seq_id.erase(seq_id);
1529+
} else {
1530+
continue;
1531+
}
1532+
if (cells[i].is_empty()) {
1533+
// keep count of the number of used cells
1534+
if (cells[i].pos >= 0) {
1535+
used--;
1536+
}
1537+
cells[i].pos = -1;
1538+
cells[i].src = -1;
1539+
if (new_head == size) {
1540+
new_head = i;
1541+
}
1542+
}
1543+
}
1544+
}
1545+
1546+
// If we freed up a slot, set head to it so searching can start there.
1547+
if (new_head != size && new_head < head) {
1548+
head = new_head;
1549+
}
1550+
15211551
return true;
15221552
}
15231553

0 commit comments

Comments
 (0)