Skip to content

Commit 980604d

Browse files
fix results for batch size < 32
1 parent e0d0a0f commit 980604d

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

ggml-cuda.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5126,11 +5126,11 @@ static void ggml_cpy_f32_q8_0_cuda(
51265126
if (first_incomplete && last_incomplete) {
51275127
GGML_ASSERT(i_blck_0 + ne00 < QK8_0); // otherwise there would be a race condition
51285128
GGML_ASSERT(pad == false);
5129-
cpy_f32_q8_0<true, true, false><<<block_nums, block_dims, 0, stream>>>
5129+
cpy_f32_q8_0<true, true, true><<<block_nums, block_dims, 0, stream>>>
51305130
(cx, cdst, i_blck_0, ne00, ne01, ne02, nb00, nb01, nb02, nb11, nb12);
51315131
} else if (first_incomplete && !last_incomplete) {
51325132
GGML_ASSERT(pad == false);
5133-
cpy_f32_q8_0<true, false, false><<<block_nums, block_dims, 0, stream>>>
5133+
cpy_f32_q8_0<true, false, true><<<block_nums, block_dims, 0, stream>>>
51345134
(cx, cdst, i_blck_0, ne00, ne01, ne02, nb00, nb01, nb02, nb11, nb12);
51355135
} else if (!first_incomplete && last_incomplete && pad) {
51365136
cpy_f32_q8_0<false, true, false><<<block_nums, block_dims, 0, stream>>>
@@ -5139,7 +5139,7 @@ static void ggml_cpy_f32_q8_0_cuda(
51395139
cpy_f32_q8_0<false, true, true><<<block_nums, block_dims, 0, stream>>>
51405140
(cx, cdst, i_blck_0, ne00, ne01, ne02, nb00, nb01, nb02, nb11, nb12);
51415141
} else if (!first_incomplete && !last_incomplete && pad) {
5142-
cpy_f32_q8_0<false, false, true><<<block_nums, block_dims, 0, stream>>>
5142+
cpy_f32_q8_0<false, false, false><<<block_nums, block_dims, 0, stream>>>
51435143
(cx, cdst, i_blck_0, ne00, ne01, ne02, nb00, nb01, nb02, nb11, nb12);
51445144
} else if (!first_incomplete && !last_incomplete && !pad) {
51455145
cpy_f32_q8_0<false, false, true><<<block_nums, block_dims, 0, stream>>>

0 commit comments

Comments
 (0)