@@ -1608,9 +1608,15 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
1608
1608
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
1609
1609
uint16_t t;
1610
1610
for (int i = 0; i < n; ++i) {
1611
- ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
1612
- memcpy(&t, &fp16, sizeof(uint16_t));
1613
- y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
1611
+ if (x[i] <= -10.0f) {
1612
+ y[i] = 0.0f;
1613
+ } else if (x[i] >= 10.0f) {
1614
+ y[i] = x[i];
1615
+ } else {
1616
+ ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
1617
+ memcpy(&t, &fp16, sizeof(uint16_t));
1618
+ y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
1619
+ }
1614
1620
}
1615
1621
}
1616
1622
#else
@@ -5780,11 +5786,13 @@ struct ggml_tensor * ggml_pool_1d(
5780
5786
is_node = true;
5781
5787
}
5782
5788
5783
- const int64_t ne[2 ] = {
5789
+ const int64_t ne[4 ] = {
5784
5790
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
5785
5791
a->ne[1],
5792
+ a->ne[2],
5793
+ a->ne[3],
5786
5794
};
5787
- struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2 , ne);
5795
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4 , ne);
5788
5796
5789
5797
int32_t params[] = { op, k0, s0, p0 };
5790
5798
ggml_set_op_params(result, params, sizeof(params));
@@ -15081,9 +15089,10 @@ static void ggml_compute_forward_map_custom1(
15081
15089
return;
15082
15090
}
15083
15091
15084
- struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
15092
+ struct ggml_map_custom1_op_params p;
15093
+ memcpy(&p, dst->op_params, sizeof(p));
15085
15094
15086
- p-> fun(dst, a, params->ith, params->nth, p-> userdata);
15095
+ p. fun(dst, a, params->ith, params->nth, p. userdata);
15087
15096
}
15088
15097
15089
15098
// ggml_compute_forward_map_custom2
@@ -15099,9 +15108,10 @@ static void ggml_compute_forward_map_custom2(
15099
15108
return;
15100
15109
}
15101
15110
15102
- struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
15111
+ struct ggml_map_custom2_op_params p;
15112
+ memcpy(&p, dst->op_params, sizeof(p));
15103
15113
15104
- p-> fun(dst, a, b, params->ith, params->nth, p-> userdata);
15114
+ p. fun(dst, a, b, params->ith, params->nth, p. userdata);
15105
15115
}
15106
15116
15107
15117
// ggml_compute_forward_map_custom3
@@ -15118,9 +15128,10 @@ static void ggml_compute_forward_map_custom3(
15118
15128
return;
15119
15129
}
15120
15130
15121
- struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
15131
+ struct ggml_map_custom3_op_params p;
15132
+ memcpy(&p, dst->op_params, sizeof(p));
15122
15133
15123
- p-> fun(dst, a, b, c, params->ith, params->nth, p-> userdata);
15134
+ p. fun(dst, a, b, c, params->ith, params->nth, p. userdata);
15124
15135
}
15125
15136
15126
15137
// ggml_compute_forward_cross_entropy_loss
@@ -17386,29 +17397,32 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
17386
17397
} break;
17387
17398
case GGML_OP_MAP_CUSTOM1:
17388
17399
{
17389
- struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
17390
- if (p->n_tasks == GGML_N_TASKS_MAX) {
17400
+ struct ggml_map_custom1_op_params p;
17401
+ memcpy(&p, node->op_params, sizeof(p));
17402
+ if (p.n_tasks == GGML_N_TASKS_MAX) {
17391
17403
n_tasks = n_threads;
17392
17404
} else {
17393
- n_tasks = MIN(p-> n_tasks, n_threads);
17405
+ n_tasks = MIN(p. n_tasks, n_threads);
17394
17406
}
17395
17407
} break;
17396
17408
case GGML_OP_MAP_CUSTOM2:
17397
17409
{
17398
- struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
17399
- if (p->n_tasks == GGML_N_TASKS_MAX) {
17410
+ struct ggml_map_custom2_op_params p;
17411
+ memcpy(&p, node->op_params, sizeof(p));
17412
+ if (p.n_tasks == GGML_N_TASKS_MAX) {
17400
17413
n_tasks = n_threads;
17401
17414
} else {
17402
- n_tasks = MIN(p-> n_tasks, n_threads);
17415
+ n_tasks = MIN(p. n_tasks, n_threads);
17403
17416
}
17404
17417
} break;
17405
17418
case GGML_OP_MAP_CUSTOM3:
17406
17419
{
17407
- struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
17408
- if (p->n_tasks == GGML_N_TASKS_MAX) {
17420
+ struct ggml_map_custom3_op_params p;
17421
+ memcpy(&p, node->op_params, sizeof(p));
17422
+ if (p.n_tasks == GGML_N_TASKS_MAX) {
17409
17423
n_tasks = n_threads;
17410
17424
} else {
17411
- n_tasks = MIN(p-> n_tasks, n_threads);
17425
+ n_tasks = MIN(p. n_tasks, n_threads);
17412
17426
}
17413
17427
} break;
17414
17428
case GGML_OP_CROSS_ENTROPY_LOSS:
0 commit comments