Skip to content

Commit 87e397d

Browse files
slarenggerganov
andauthored
ggml : fix quant dot product with odd number of blocks (#8549)
* ggml : fix iq4_nl dot product with odd number of blocks * ggml : fix odd blocks for ARM_NEON (#8556) * ggml : fix iq4_nl dot product with odd number of blocks * ggml : fix q4_1 * ggml : fix q5_0 * ggml : fix q5_1 * ggml : fix iq4_nl metal ggml-ci * ggml : fix q4_0 * ggml : fix q8_0 ggml-ci * ggml : remove special Q4_0 code for first 2 blocks * ggml : fix sumf redefinition --------- Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
1 parent 57b1d4f commit 87e397d

File tree

4 files changed

+362
-501
lines changed

4 files changed

+362
-501
lines changed

ggml/src/ggml-metal.m

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,10 +1786,6 @@ static enum ggml_status ggml_metal_graph_compute(
17861786
}
17871787
};
17881788

1789-
if (ggml_is_quantized(src0t)) {
1790-
GGML_ASSERT(ne00 >= nth0*nth1);
1791-
}
1792-
17931789
[encoder setComputePipelineState:pipeline];
17941790
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
17951791
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];

ggml/src/ggml-metal.metal

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4757,7 +4757,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
47574757
device const float4 * y4 = (device const float4 *)yb;
47584758
yl[0] = y4[0]; yl[1] = y4[4]; yl[2] = y4[1]; yl[3] = y4[5];
47594759

4760-
for (int row = 0; row < 2; ++row) {
4760+
for (int row = 0; row < 2 && first_row + row < ne01; ++row) {
47614761

47624762
device const block_iq4_nl & xb = x[row*nb + ib];
47634763
device const uint16_t * q4 = (device const uint16_t *)(xb.qs + 8*it);
@@ -4789,7 +4789,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
47894789
yb += 16 * QK4_NL;
47904790
}
47914791

4792-
for (int row = 0; row < 2; ++row) {
4792+
for (int row = 0; row < 2 && first_row + row < ne01; ++row) {
47934793
all_sum = simd_sum(sumf[row]);
47944794
if (tiisg == 0) {
47954795
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum;

0 commit comments

Comments
 (0)