Skip to content

Commit 85a267d

Browse files
CUDA: fix MMQ stream-k for --split-mode row (#8167)
1 parent f675b20 commit 85a267d

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2475,7 +2475,7 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a
24752475

24762476
const dim3 block_nums_mmq(nsm, 1, 1);
24772477

2478-
ggml_cuda_pool & pool = ctx.pool();
2478+
ggml_cuda_pool & pool = ctx.pool(id);
24792479
ggml_cuda_pool_alloc<float> tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y);
24802480

24812481
if (args.ne01 % mmq_y == 0) {

0 commit comments

Comments
 (0)