Skip to content

Commit 611aa91

Browse files
authored
metal : optimize MoE for large batches (#13388)
ggml-ci
1 parent 0cf6725 commit 611aa91

File tree

4 files changed

+458
-293
lines changed

4 files changed

+458
-293
lines changed

ggml/src/ggml-metal/ggml-metal-impl.h

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -299,21 +299,42 @@ typedef struct {
299299
} ggml_metal_kargs_mul_mv_ext;
300300

301301
typedef struct {
302-
int32_t nei0;
303-
int32_t nei1;
304-
uint64_t nbi1;
302+
int32_t ne10;
303+
int32_t ne11; // n_expert_used (bcast)
304+
uint64_t nb11;
305+
uint64_t nb12;
306+
int32_t neh11; // n_tokens
307+
uint64_t nbh11;
308+
int32_t ne20; // n_expert_used
309+
uint64_t nb21;
310+
} ggml_metal_kargs_mul_mm_id_map0;
311+
312+
typedef struct {
313+
int32_t ne20; // n_expert_used
314+
int32_t neh0;
315+
int32_t neh1;
316+
uint64_t nbh1;
317+
uint64_t nbh2;
318+
int32_t ne0;
319+
uint64_t nb1;
320+
uint64_t nb2;
321+
} ggml_metal_kargs_mul_mm_id_map1;
322+
323+
typedef struct {
305324
int32_t ne00;
306325
int32_t ne02;
307326
uint64_t nb01;
308327
uint64_t nb02;
309-
int32_t ne11;
310-
int32_t ne12;
311-
int32_t ne13;
312-
uint64_t nb10;
313-
uint64_t nb11;
314-
uint64_t nb12;
315-
int32_t ne0;
316-
int32_t ne1;
328+
uint64_t nb03;
329+
int32_t neh12;
330+
uint64_t nbh10;
331+
uint64_t nbh11;
332+
uint64_t nbh12;
333+
uint64_t nbh13;
334+
int32_t neh0;
335+
int32_t neh1;
336+
int16_t r2;
337+
int16_t r3;
317338
} ggml_metal_kargs_mul_mm_id;
318339

319340
typedef struct {

0 commit comments

Comments
 (0)