File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -18211,13 +18211,13 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
18211
18211
static void llama_kv_cache_update_internal(struct llama_context & lctx) {
18212
18212
bool need_reserve = false;
18213
18213
18214
- // apply K-shift if needed
18215
- if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
18214
+ if (lctx.kv_self.has_shift) {
18216
18215
if (!llama_kv_cache_can_shift(&lctx)) {
18217
- GGML_ABORT("Deepseek2 does not support K-shift");
18216
+ GGML_ABORT("The current context does not support K-shift");
18218
18217
}
18219
18218
18220
- {
18219
+ // apply K-shift if needed
18220
+ if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE) {
18221
18221
ggml_backend_sched_reset(lctx.sched.get());
18222
18222
18223
18223
ggml_cgraph * gf = llama_build_graph_k_shift(lctx);
@@ -20463,7 +20463,7 @@ void llama_kv_cache_update(struct llama_context * ctx) {
20463
20463
}
20464
20464
20465
20465
bool llama_kv_cache_can_shift(struct llama_context * ctx) {
20466
- return ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
20466
+ return !ctx->kv_self.recurrent && ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
20467
20467
}
20468
20468
20469
20469
// deprecated
You can’t perform that action at this time.
0 commit comments