Skip to content

Commit 5865b18

Browse files
committed
metal : fix mat-vec Q4_K kernel for QK_K == 64
1 parent a8b9bb4 commit 5865b18

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

ggml-metal.metal

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3018,8 +3018,8 @@ void kernel_mul_mv_q4_K_f32_impl(
30183018
constant uint & r2,
30193019
constant uint & r3,
30203020
uint3 tgpig[[threadgroup_position_in_grid]],
3021-
uint tiisg[[thread_index_in_simdgroup]],
3022-
uint sgitg[[simdgroup_index_in_threadgroup]]) {
3021+
uint tiisg[[thread_index_in_simdgroup]],
3022+
uint sgitg[[simdgroup_index_in_threadgroup]]) {
30233023

30243024
const int ix = tiisg/4; // 0...7
30253025
const int it = tiisg%4; // 0...3
@@ -3028,7 +3028,7 @@ void kernel_mul_mv_q4_K_f32_impl(
30283028
const int r0 = tgpig.x;
30293029
const int r1 = tgpig.y;
30303030
const int im = tgpig.z;
3031-
const int first_row = (r0 * N_SIMDGROUP + sgitg) * N_DST;
3031+
const int first_row = r0 * N_DST;
30323032
const int ib_row = first_row * nb;
30333033

30343034
const uint i12 = im%ne12;
@@ -3094,7 +3094,7 @@ void kernel_mul_mv_q4_K_f32_impl(
30943094
for (int row = 0; row < N_DST; ++row) {
30953095
all_sum = simd_sum(sumf[row]);
30963096
if (tiisg == 0) {
3097-
dst[r1*ne0+ im*ne0*ne1 + first_row + row] = all_sum;
3097+
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum;
30983098
}
30993099
}
31003100
}

0 commit comments

Comments
 (0)