@@ -290,7 +290,7 @@ llama_pos llama_kv_cache_unified::seq_pos_min(llama_seq_id seq_id) const {
290
290
291
291
for (uint32_t i = 0 ; i < cells.size (); ++i) {
292
292
if (cells.seq_has (i, seq_id)) {
293
- result = std::min (result, cells.get_pos (i));
293
+ result = std::min (result, cells.pos_get (i));
294
294
}
295
295
}
296
296
@@ -306,7 +306,7 @@ llama_pos llama_kv_cache_unified::seq_pos_max(llama_seq_id seq_id) const {
306
306
307
307
for (uint32_t i = 0 ; i < cells.size (); ++i) {
308
308
if (cells.seq_has (i, seq_id)) {
309
- result = std::max (result, cells.get_pos (i));
309
+ result = std::max (result, cells.pos_get (i));
310
310
}
311
311
}
312
312
@@ -611,7 +611,7 @@ void llama_kv_cache_unified::prune_swa(llama_seq_id seq_id, llama_pos pmin, llam
611
611
continue ;
612
612
}
613
613
614
- const llama_pos p0 = cells.get_pos (i);
614
+ const llama_pos p0 = cells.pos_get (i);
615
615
616
616
if (p0 <= pmin && !is_masked_swa (p0, pmin)) {
617
617
n_attended++;
@@ -664,7 +664,7 @@ void llama_kv_cache_unified::set_input_kq_mask(ggml_tensor * dst, const llama_ub
664
664
if (cells.is_empty (i)) {
665
665
masked = true ;
666
666
} else {
667
- const llama_pos p0 = cells.get_pos (i);
667
+ const llama_pos p0 = cells.pos_get (i);
668
668
669
669
// mask the token if not the same sequence
670
670
masked = masked || (!cells.seq_has (i, seq_id));
@@ -724,7 +724,7 @@ void llama_kv_cache_unified::set_input_pos_bucket(ggml_tensor * dst, const llama
724
724
for (int j = 0 ; j < n_tokens; ++j) {
725
725
for (int i = 0 ; i < n_kv; ++i) {
726
726
// the position when the cells is empty is irrelevant - it will be masked out later in the attention
727
- const llama_pos p0 = cells.is_empty (i) ? -1 : cells.get_pos (i);
727
+ const llama_pos p0 = cells.is_empty (i) ? -1 : cells.pos_get (i);
728
728
729
729
data[h*(n_kv*n_tokens) + j*n_kv + i] = llama_relative_position_bucket (p0, ubatch->pos [j], hparams.n_rel_attn_bkts , false );
730
730
}
@@ -1250,7 +1250,7 @@ void llama_kv_cache_unified::state_write_meta(llama_io_write_i & io, const std::
1250
1250
}
1251
1251
}
1252
1252
1253
- const llama_pos pos = cells.get_pos (i);
1253
+ const llama_pos pos = cells.pos_get (i);
1254
1254
const uint32_t n_seq_id = seq_ids.size ();
1255
1255
1256
1256
io.write (&pos, sizeof (pos));
@@ -1394,8 +1394,8 @@ bool llama_kv_cache_unified::state_read_meta(llama_io_read_i & io, uint32_t cell
1394
1394
// DEBUG CHECK: kv.head should be our first cell, kv.head + cell_count - 1 should be our last cell (verify seq_id and pos values)
1395
1395
// Assume that this is one contiguous block of cells
1396
1396
GGML_ASSERT (head + cell_count <= cells.size ());
1397
- GGML_ASSERT (cells.get_pos (head) == batch.pos [0 ]);
1398
- GGML_ASSERT (cells.get_pos (head + cell_count - 1 ) == batch.pos [cell_count - 1 ]);
1397
+ GGML_ASSERT (cells.pos_get (head) == batch.pos [0 ]);
1398
+ GGML_ASSERT (cells.pos_get (head + cell_count - 1 ) == batch.pos [cell_count - 1 ]);
1399
1399
GGML_ASSERT (cells.seq_has (head, dest_seq_id));
1400
1400
GGML_ASSERT (cells.seq_has (head + cell_count - 1 , dest_seq_id));
1401
1401
} else {
0 commit comments