Skip to content

Commit 2774b0c

Browse files
slarenggerganov
authored andcommitted
add google magika inference example (ggml/748)
* add magika inference example * ggml : fix unaligned accesses in custom ops * ggml : fix FP32 GELU for values that exceed the FP16 range * use ggml_pool_1d * add README * Update README.md * pad inputs if the files are too small * cleanup ggml-ci
1 parent 5f70671 commit 2774b0c

File tree

1 file changed

+34
-20
lines changed

1 file changed

+34
-20
lines changed

ggml.c

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,9 +1608,15 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
16081608
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
16091609
uint16_t t;
16101610
for (int i = 0; i < n; ++i) {
1611-
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
1612-
memcpy(&t, &fp16, sizeof(uint16_t));
1613-
y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
1611+
if (x[i] <= -10.0f) {
1612+
y[i] = 0.0f;
1613+
} else if (x[i] >= 10.0f) {
1614+
y[i] = x[i];
1615+
} else {
1616+
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
1617+
memcpy(&t, &fp16, sizeof(uint16_t));
1618+
y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
1619+
}
16141620
}
16151621
}
16161622
#else
@@ -5780,11 +5786,13 @@ struct ggml_tensor * ggml_pool_1d(
57805786
is_node = true;
57815787
}
57825788

5783-
const int64_t ne[2] = {
5789+
const int64_t ne[4] = {
57845790
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
57855791
a->ne[1],
5792+
a->ne[2],
5793+
a->ne[3],
57865794
};
5787-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
5795+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
57885796

57895797
int32_t params[] = { op, k0, s0, p0 };
57905798
ggml_set_op_params(result, params, sizeof(params));
@@ -15081,9 +15089,10 @@ static void ggml_compute_forward_map_custom1(
1508115089
return;
1508215090
}
1508315091

15084-
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
15092+
struct ggml_map_custom1_op_params p;
15093+
memcpy(&p, dst->op_params, sizeof(p));
1508515094

15086-
p->fun(dst, a, params->ith, params->nth, p->userdata);
15095+
p.fun(dst, a, params->ith, params->nth, p.userdata);
1508715096
}
1508815097

1508915098
// ggml_compute_forward_map_custom2
@@ -15099,9 +15108,10 @@ static void ggml_compute_forward_map_custom2(
1509915108
return;
1510015109
}
1510115110

15102-
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
15111+
struct ggml_map_custom2_op_params p;
15112+
memcpy(&p, dst->op_params, sizeof(p));
1510315113

15104-
p->fun(dst, a, b, params->ith, params->nth, p->userdata);
15114+
p.fun(dst, a, b, params->ith, params->nth, p.userdata);
1510515115
}
1510615116

1510715117
// ggml_compute_forward_map_custom3
@@ -15118,9 +15128,10 @@ static void ggml_compute_forward_map_custom3(
1511815128
return;
1511915129
}
1512015130

15121-
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
15131+
struct ggml_map_custom3_op_params p;
15132+
memcpy(&p, dst->op_params, sizeof(p));
1512215133

15123-
p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
15134+
p.fun(dst, a, b, c, params->ith, params->nth, p.userdata);
1512415135
}
1512515136

1512615137
// ggml_compute_forward_cross_entropy_loss
@@ -17386,29 +17397,32 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
1738617397
} break;
1738717398
case GGML_OP_MAP_CUSTOM1:
1738817399
{
17389-
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
17390-
if (p->n_tasks == GGML_N_TASKS_MAX) {
17400+
struct ggml_map_custom1_op_params p;
17401+
memcpy(&p, node->op_params, sizeof(p));
17402+
if (p.n_tasks == GGML_N_TASKS_MAX) {
1739117403
n_tasks = n_threads;
1739217404
} else {
17393-
n_tasks = MIN(p->n_tasks, n_threads);
17405+
n_tasks = MIN(p.n_tasks, n_threads);
1739417406
}
1739517407
} break;
1739617408
case GGML_OP_MAP_CUSTOM2:
1739717409
{
17398-
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
17399-
if (p->n_tasks == GGML_N_TASKS_MAX) {
17410+
struct ggml_map_custom2_op_params p;
17411+
memcpy(&p, node->op_params, sizeof(p));
17412+
if (p.n_tasks == GGML_N_TASKS_MAX) {
1740017413
n_tasks = n_threads;
1740117414
} else {
17402-
n_tasks = MIN(p->n_tasks, n_threads);
17415+
n_tasks = MIN(p.n_tasks, n_threads);
1740317416
}
1740417417
} break;
1740517418
case GGML_OP_MAP_CUSTOM3:
1740617419
{
17407-
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
17408-
if (p->n_tasks == GGML_N_TASKS_MAX) {
17420+
struct ggml_map_custom3_op_params p;
17421+
memcpy(&p, node->op_params, sizeof(p));
17422+
if (p.n_tasks == GGML_N_TASKS_MAX) {
1740917423
n_tasks = n_threads;
1741017424
} else {
17411-
n_tasks = MIN(p->n_tasks, n_threads);
17425+
n_tasks = MIN(p.n_tasks, n_threads);
1741217426
}
1741317427
} break;
1741417428
case GGML_OP_CROSS_ENTROPY_LOSS:

0 commit comments

Comments
 (0)