File tree Expand file tree Collapse file tree 4 files changed +362
-501
lines changed Expand file tree Collapse file tree 4 files changed +362
-501
lines changed Original file line number Diff line number Diff line change @@ -1786,10 +1786,6 @@ static enum ggml_status ggml_metal_graph_compute(
1786
1786
}
1787
1787
};
1788
1788
1789
- if (ggml_is_quantized (src0t)) {
1790
- GGML_ASSERT (ne00 >= nth0*nth1);
1791
- }
1792
-
1793
1789
[encoder setComputePipelineState: pipeline];
1794
1790
[encoder setBuffer: id_src0 offset: offs_src0 atIndex: 0 ];
1795
1791
[encoder setBuffer: id_src1 offset: offs_src1 atIndex: 1 ];
Original file line number Diff line number Diff line change @@ -4757,7 +4757,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
4757
4757
device const float4 * y4 = (device const float4 *)yb;
4758
4758
yl[0 ] = y4[0 ]; yl[1 ] = y4[4 ]; yl[2 ] = y4[1 ]; yl[3 ] = y4[5 ];
4759
4759
4760
- for (int row = 0 ; row < 2 ; ++row) {
4760
+ for (int row = 0 ; row < 2 && first_row + row < ne01 ; ++row) {
4761
4761
4762
4762
device const block_iq4_nl & xb = x[row*nb + ib];
4763
4763
device const uint16_t * q4 = (device const uint16_t *)(xb.qs + 8 *it);
@@ -4789,7 +4789,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
4789
4789
yb += 16 * QK4_NL;
4790
4790
}
4791
4791
4792
- for (int row = 0 ; row < 2 ; ++row) {
4792
+ for (int row = 0 ; row < 2 && first_row + row < ne01 ; ++row) {
4793
4793
all_sum = simd_sum (sumf[row]);
4794
4794
if (tiisg == 0 ) {
4795
4795
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum;
You can’t perform that action at this time.
0 commit comments