Skip to content

Commit cf10001

Browse files
committed
context : fix graph reserve for multiple sequences
ggml-ci
1 parent 07fb71a commit cf10001

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

src/llama-context.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,13 @@ ggml_cgraph * llama_context::graph_init() {
12071207
ggml_cgraph * llama_context::graph_reserve(uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs) {
12081208
LLAMA_LOG_DEBUG("%s: reserving a graph for ubatch with n_tokens = %4u, n_seqs = %2u, n_outputs = %4u\n", __func__, n_tokens, n_seqs, n_outputs);
12091209

1210+
if (n_tokens % n_seqs != 0) {
1211+
n_tokens = (n_tokens / n_seqs) * n_seqs;
1212+
n_outputs = std::min(n_outputs, n_tokens);
1213+
1214+
LLAMA_LOG_DEBUG("%s: making n_tokens a multiple of n_seqs - n_tokens = %u, n_seqs = %u, n_outputs = %u\n", __func__, n_tokens, n_seqs, n_outputs);
1215+
}
1216+
12101217
// store the n_outputs as it is, and restore it afterwards
12111218
// TODO: not sure if needed, might simplify in the future by removing this
12121219
const auto save_n_outputs = this->n_outputs;

0 commit comments

Comments
 (0)