Skip to content

Commit ee8fb39

Browse files
committed
ggml : add n_as argument to ggml_mul_mat_id
1 parent 7372b62 commit ee8fb39

File tree

6 files changed

+17
-14
lines changed

6 files changed

+17
-14
lines changed

ggml-cuda.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8244,6 +8244,8 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s
82448244

82458245
const struct ggml_tensor * ids = src0;
82468246
const int32_t id = dst->op_params[0];
8247+
const int32_t n_as = dst->op_params[1];
8248+
82478249
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device];
82488250

82498251
std::vector<char> ids_host(ggml_nbytes(ids));
@@ -8272,7 +8274,7 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s
82728274

82738275
const int32_t row_id = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
82748276

8275-
GGML_ASSERT(row_id >= 0 && row_id < ids->ne[0]);
8277+
GGML_ASSERT(row_id >= 0 && row_id < n_as);
82768278

82778279
const struct ggml_tensor * src0_row = dst->src[row_id + 2];
82788280

ggml-metal.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1460,7 +1460,7 @@ void ggml_metal_graph_compute(
14601460

14611461
GGML_ASSERT(src0t == GGML_TYPE_I32);
14621462

1463-
const int n_as = ne00;
1463+
const int n_as = ((int32_t *) dst->op_params)[1];
14641464

14651465
// TODO: make this more general
14661466
GGML_ASSERT(n_as <= 8);

ggml.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4076,12 +4076,11 @@ struct ggml_tensor * ggml_mul_mat(
40764076
struct ggml_tensor * ggml_mul_mat_id(
40774077
struct ggml_context * ctx,
40784078
struct ggml_tensor * as[],
4079+
int n_as,
40794080
struct ggml_tensor * ids,
40804081
int id,
40814082
struct ggml_tensor * b) {
40824083

4083-
int64_t n_as = ids->ne[0];
4084-
40854084
GGML_ASSERT(ids->type == GGML_TYPE_I32);
40864085
GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1);
40874086
GGML_ASSERT(ids->ne[1] == b->ne[1]);
@@ -4099,15 +4098,15 @@ struct ggml_tensor * ggml_mul_mat_id(
40994098
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(as[0]->n_dims, b->n_dims), ne);
41004099

41014100
ggml_set_op_params_i32(result, 0, id);
4101+
ggml_set_op_params_i32(result, 1, n_as);
41024102

41034103
result->op = GGML_OP_MUL_MAT_ID;
41044104
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
41054105
result->src[0] = ids;
41064106
result->src[1] = b;
41074107

41084108
// TODO: n_as is the selected experts, but it should be the total number of experts
4109-
//for (int64_t i = 0; i < n_as; i++) {
4110-
for (int64_t i = 0; i < 8; i++) {
4109+
for (int i = 0; i < n_as; i++) {
41114110
struct ggml_tensor * a = as[i];
41124111
GGML_ASSERT(ggml_are_same_shape(as[0], a));
41134112
GGML_ASSERT(ggml_can_mul_mat(a, b));
@@ -9757,14 +9756,13 @@ static void ggml_compute_forward_mul_mat_id(
97579756
}
97589757

97599758
const struct ggml_tensor * ids = src0;
9760-
const int id = ggml_get_op_params_i32(dst, 0);
9759+
const int id = ggml_get_op_params_i32(dst, 0);
9760+
const int n_as = ggml_get_op_params_i32(dst, 1);
97619761

97629762
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
97639763
const int32_t row_id = *(const int32_t *) ((const char *) ids->data + i01*ids->nb[1] + id*ids->nb[0]);
97649764

9765-
// TODO: this assert seems wrong?
9766-
//printf("row_id = %d, ids->ne[0] = %d, id = %d\n", row_id, ids->ne[0], id);
9767-
//GGML_ASSERT(row_id >= 0 && row_id < ids->ne[0]);
9765+
GGML_ASSERT(row_id >= 0 && row_id < n_as);
97689766

97699767
const struct ggml_tensor * src0_row = dst->src[row_id + 2];
97709768
ggml_compute_forward_mul_mat(params, src0_row, src1, dst, i01, 1);

ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,7 @@ extern "C" {
10521052
GGML_API struct ggml_tensor * ggml_mul_mat_id(
10531053
struct ggml_context * ctx,
10541054
struct ggml_tensor * as[],
1055+
int n_as,
10551056
struct ggml_tensor * ids,
10561057
int id,
10571058
struct ggml_tensor * b);

llama.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4270,11 +4270,11 @@ struct llm_build_context {
42704270
ggml_tensor ** ffn_down_exp = (ggml_tensor **) model.layers[il].ffn_down_exp;
42714271

42724272
cur_expert = ggml_mul(ctx0,
4273-
ggml_mul_mat_id(ctx0, ffn_up_exp, selected_experts, i, cur),
4273+
ggml_mul_mat_id(ctx0, ffn_up_exp, n_experts, selected_experts, i, cur),
42744274
ggml_silu(ctx0,
4275-
ggml_mul_mat_id(ctx0, ffn_gate_exp, selected_experts, i, cur))); // [n_tokens, n_embd]
4275+
ggml_mul_mat_id(ctx0, ffn_gate_exp, n_experts, selected_experts, i, cur))); // [n_tokens, n_embd]
42764276

4277-
cur_expert = ggml_mul_mat_id(ctx0, ffn_down_exp, selected_experts, i, cur_expert); // [n_tokens, n_embd]
4277+
cur_expert = ggml_mul_mat_id(ctx0, ffn_down_exp, n_experts, selected_experts, i, cur_expert); // [n_tokens, n_embd]
42784278
cur_expert = ggml_mul(ctx0, cur_expert,
42794279
ggml_view_2d(ctx0, weights, 1, n_tokens, weights->nb[1], i*weights->nb[0]));
42804280

tests/test-backend-ops.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,8 @@ struct test_case {
343343
ud->ok = false;
344344
}
345345
return true;
346+
347+
GGML_UNUSED(index);
346348
};
347349

348350
ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud);
@@ -803,7 +805,7 @@ struct test_mul_mat_id : public test_case {
803805
}
804806
ggml_tensor * ids = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, n_mats, n);
805807
ggml_tensor * b = ggml_new_tensor_2d(ctx, type_b, k, n);
806-
ggml_tensor * out = ggml_mul_mat_id(ctx, mats.data(), ids, id, b);
808+
ggml_tensor * out = ggml_mul_mat_id(ctx, mats.data(), n_mats, ids, id, b);
807809
return out;
808810
}
809811

0 commit comments

Comments
 (0)