Skip to content

Commit 06f9220

Browse files
committed
ggml : add I32 <-> F32 conversion
ggml-ci
1 parent fc77536 commit 06f9220

File tree

2 files changed

+21
-4
lines changed

2 files changed

+21
-4
lines changed

ggml.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,18 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
355355
}
356356
}
357357

358+
static void ggml_i32_to_f32_row(const int32_t * x, float * y, int n) {
359+
for (int i = 0; i < n; i++) {
360+
y[i] = (float) x[i];
361+
}
362+
}
363+
364+
static void ggml_f32_to_i32_row(const float * x, int32_t * y, int n) {
365+
for (int i = 0; i < n; i++) {
366+
y[i] = (int32_t) x[i];
367+
}
368+
}
369+
358370
//
359371
// timing
360372
//
@@ -454,6 +466,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
454466
.blck_size = 1,
455467
.type_size = sizeof(int32_t),
456468
.is_quantized = false,
469+
.to_float = (ggml_to_float_t) ggml_i32_to_f32_row,
470+
.from_float = (ggml_from_float_t) ggml_f32_to_i32_row,
471+
.from_float_reference = (ggml_from_float_t) ggml_f32_to_i32_row,
457472
},
458473
[GGML_TYPE_F32] = {
459474
.type_name = "f32",
@@ -469,7 +484,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
469484
.blck_size = 1,
470485
.type_size = sizeof(ggml_fp16_t),
471486
.is_quantized = false,
472-
.to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
487+
.to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
473488
.from_float = (ggml_from_float_t) ggml_fp32_to_fp16_row,
474489
.from_float_reference = (ggml_from_float_t) ggml_fp32_to_fp16_row,
475490
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16,

llama.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5928,18 +5928,20 @@ struct llm_build_context {
59285928

59295929
// get input vectors with right size
59305930
const size_t stride1 = n_tokens * ggml_type_size(lctx.inp_tokens->type);
5931-
struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0);
5931+
5932+
struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0);
59325933
struct ggml_tensor * inp_mean = ggml_view_2d(ctx0, lctx.inp_mean, n_tokens, n_tokens, stride1, 0);
5933-
struct ggml_tensor * inp_cls = ggml_view_1d(ctx0, lctx.inp_cls, n_tokens, 0);
5934+
struct ggml_tensor * inp_cls = ggml_view_1d(ctx0, lctx.inp_cls, n_tokens, 0);
59345935

59355936
// construct input embeddings (token, type, position)
59365937
inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb);
59375938

59385939
// token types are hardcoded to zero ("Sentence A")
59395940
struct ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0);
59405941
inpL = ggml_add(ctx0, inpL, type_row0);
5942+
59415943
if (model.arch == LLM_ARCH_BERT) {
5942-
inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, inp_pos), inpL);
5944+
inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, ggml_cast(ctx0, inp_pos, GGML_TYPE_I32)), inpL);
59435945
}
59445946
cb(inpL, "inp_embd", -1);
59455947

0 commit comments

Comments
 (0)