ggml : add I32 <-> F32 conversion

ggerganov · ggerganov · commit 06f9220211dd · 2024-02-23T14:15:47.000+02:00
ggml-ci
diff --git a/ggml.c b/ggml.c
@@ -355,6 +355,18 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
     }
 }
 
+static void ggml_i32_to_f32_row(const int32_t * x, float * y, int n) {
+    for (int i = 0; i < n; i++) {
+        y[i] = (float) x[i];
+    }
+}
+
+static void ggml_f32_to_i32_row(const float * x, int32_t * y, int n) {
+    for (int i = 0; i < n; i++) {
+        y[i] = (int32_t) x[i];
+    }
+}
+
 //
 // timing
 //
@@ -454,6 +466,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = 1,
         .type_size                = sizeof(int32_t),
         .is_quantized             = false,
+        .to_float                 = (ggml_to_float_t)   ggml_i32_to_f32_row,
+        .from_float               = (ggml_from_float_t) ggml_f32_to_i32_row,
+        .from_float_reference     = (ggml_from_float_t) ggml_f32_to_i32_row,
     },
     [GGML_TYPE_F32] = {
         .type_name                = "f32",
@@ -469,7 +484,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = 1,
         .type_size                = sizeof(ggml_fp16_t),
         .is_quantized             = false,
-        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
+        .to_float                 = (ggml_to_float_t)   ggml_fp16_to_fp32_row,
         .from_float               = (ggml_from_float_t) ggml_fp32_to_fp16_row,
         .from_float_reference     = (ggml_from_float_t) ggml_fp32_to_fp16_row,
         .vec_dot                  = (ggml_vec_dot_t) ggml_vec_dot_f16,
diff --git a/llama.cpp b/llama.cpp
@@ -5928,18 +5928,20 @@ struct llm_build_context {
 
         // get input vectors with right size
         const size_t stride1 = n_tokens * ggml_type_size(lctx.inp_tokens->type);
-        struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0);
+
+        struct ggml_tensor * inp_pos  = ggml_view_1d(ctx0, lctx.inp_pos,  n_tokens, 0);
         struct ggml_tensor * inp_mean = ggml_view_2d(ctx0, lctx.inp_mean, n_tokens, n_tokens, stride1, 0);
-        struct ggml_tensor * inp_cls = ggml_view_1d(ctx0, lctx.inp_cls, n_tokens, 0);
+        struct ggml_tensor * inp_cls  = ggml_view_1d(ctx0, lctx.inp_cls,  n_tokens, 0);
 
         // construct input embeddings (token, type, position)
         inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb);
 
         // token types are hardcoded to zero ("Sentence A")
         struct ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0);
         inpL = ggml_add(ctx0, inpL, type_row0);
+
         if (model.arch == LLM_ARCH_BERT) {
-            inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, inp_pos), inpL);
+            inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, ggml_cast(ctx0, inp_pos, GGML_TYPE_I32)), inpL);
         }
         cb(inpL, "inp_embd", -1);