log time measurements

trollkotze · trollkotze · commit bd9f6b9dcffa · 2024-03-26T18:52:31.000+01:00
diff --git a/common/common.cpp b/common/common.cpp
@@ -2640,6 +2640,8 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
 //
 
 static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {
+    auto start = ggml_time_ms();
+    printf("control vector load_one...\n");
     int32_t n_tensors;
 
     size_t n_bytes = 0;
@@ -2684,7 +2686,6 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
                     fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
                     gguf_free(meta_ctx_gguf);
                     ggml_free(meta_ctx);
-                    return result;
                 }
             }
 
@@ -2751,10 +2752,14 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
     gguf_free(ctx_gguf);
     ggml_free(ctx);
 
+    auto end = ggml_time_ms();
+    printf("control vector load_one took %ums\n", end - start);
     return result;
 }
 
 llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos) {
+    auto start = ggml_time_ms();
+    printf("control vector load...\n");
     llama_control_vector_data result = { -1, {} };
 
     for (const auto & info : load_infos) {
@@ -2764,7 +2769,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
             return result;
         }
         if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) {
-            fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
+            printf("%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
             return result;
         }
 
@@ -2778,8 +2783,10 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
     }
 
     if (result.n_embd == -1) {
-        fprintf(stderr, "%s: no vectors passed\n", __func__);
+        printf("%s: no vectors passed\n", __func__);
     }
 
+    auto end = ggml_time_ms();
+    printf("control vector load time: %ums\n", end-start);
     return result;
 }
diff --git a/llama.cpp b/llama.cpp
@@ -13994,6 +13994,8 @@ int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const
 }
 
 static bool llama_control_vector_init(struct llama_control_vector & cvec, const llama_model & model) {
+    auto start = ggml_time_ms();
+    fprintf(stderr, "control vector init...\n");
     GGML_ASSERT(cvec.tensors.empty());
     GGML_ASSERT(cvec.ctxs.empty());
     GGML_ASSERT(cvec.bufs.empty());
@@ -14016,6 +14018,9 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
         ggml_context * ctx = ggml_init(params);
         if (!ctx) {
             LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__);
+            auto end = ggml_time_ms();
+            fprintf(stderr, "control vector init took %ums\n", end - start);
+            return true;
             return 1;
         }
         ctx_map[it.first] = ctx;
@@ -14036,24 +14041,33 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
         ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
         if (!buf) {
             LLAMA_LOG_ERROR("%s: failed to allocate buffer for control vector\n", __func__);
+            auto end = ggml_time_ms();
+            fprintf(stderr, "control vector init took %ums\n", end - start);
+            return true;
             return false;
         }
         ggml_backend_buffer_clear(buf, 0);
         cvec.ctxs.push_back(ctx);
         cvec.bufs.push_back(buf);
     }
 
+    auto end = ggml_time_ms();
+    fprintf(stderr, "control vector init took %ums\n", end - start);
     return true;
 }
 
 int32_t llama_control_vector_apply(struct llama_context * lctx, const float * data, size_t len, int32_t n_embd, int32_t il_start, int32_t il_end) {
+    auto start = ggml_time_ms();
+    printf("control vector apply...\n");
     const llama_model & model = lctx->model;
     llama_control_vector & cvec = lctx->cvec;
 
     if (data == nullptr) {
         // disable the current control vector (but leave allocated for later)
         cvec.layer_start = -1;
         cvec.layer_end   = -1;
+        auto end = ggml_time_ms();
+        printf("control vector apply took %ums\n", end - start);
         return 0;
     }
 
@@ -14064,6 +14078,7 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
 
     if (cvec.tensors.empty()) {
         if (!llama_control_vector_init(cvec, model)) {
+            LLAMA_LOG_ERROR("%s: control vector init failed\n", __func__);
             return 1;
         }
     }
@@ -14080,6 +14095,8 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
         }
     }
 
+    auto end = ggml_time_ms();
+    printf("control vector apply took %ums\n", end - start);
     return 0;
 }
 

Original file line number	Diff line number	Diff line change
`@@ -2640,6 +2640,8 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)`
`2640`	`2640`	`//`
`2641`	`2641`
`2642`	`2642`	`static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {`
	`2643`	`+ auto start = ggml_time_ms();`
	`2644`	`+ printf("control vector load_one...\n");`
`2643`	`2645`	`int32_t n_tensors;`
`2644`	`2646`
`2645`	`2647`	`size_t n_bytes = 0;`
`@@ -2684,7 +2686,6 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr`
`2684`	`2686`	`fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());`
`2685`	`2687`	`gguf_free(meta_ctx_gguf);`
`2686`	`2688`	`ggml_free(meta_ctx);`
`2687`		`- return result;`
`2688`	`2689`	`}`
`2689`	`2690`	`}`
`2690`	`2691`
`@@ -2751,10 +2752,14 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr`
`2751`	`2752`	`gguf_free(ctx_gguf);`
`2752`	`2753`	`ggml_free(ctx);`
`2753`	`2754`
	`2755`	`+ auto end = ggml_time_ms();`
	`2756`	`+ printf("control vector load_one took %ums\n", end - start);`
`2754`	`2757`	`return result;`
`2755`	`2758`	`}`
`2756`	`2759`
`2757`	`2760`	`llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos) {`
	`2761`	`+ auto start = ggml_time_ms();`
	`2762`	`+ printf("control vector load...\n");`
`2758`	`2763`	`llama_control_vector_data result = { -1, {} };`
`2759`	`2764`
`2760`	`2765`	`for (const auto & info : load_infos) {`
`@@ -2764,7 +2769,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont`
`2764`	`2769`	`return result;`
`2765`	`2770`	`}`
`2766`	`2771`	`if (result.n_embd != -1 && (result.n_embd != cur.n_embd \|\| result.data.size() != cur.data.size())) {`
`2767`		`- fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());`
	`2772`	`+ printf("%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());`
`2768`	`2773`	`return result;`
`2769`	`2774`	`}`
`2770`	`2775`
`@@ -2778,8 +2783,10 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont`
`2778`	`2783`	`}`
`2779`	`2784`
`2780`	`2785`	`if (result.n_embd == -1) {`
`2781`		`- fprintf(stderr, "%s: no vectors passed\n", __func__);`
	`2786`	`+ printf("%s: no vectors passed\n", __func__);`
`2782`	`2787`	`}`
`2783`	`2788`
	`2789`	`+ auto end = ggml_time_ms();`
	`2790`	`+ printf("control vector load time: %ums\n", end-start);`
`2784`	`2791`	`return result;`
`2785`	`2792`	`}`
Original file line number	Diff line number	Diff line change
`@@ -13994,6 +13994,8 @@ int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const`
`13994`	`13994`	`}`
`13995`	`13995`
`13996`	`13996`	`static bool llama_control_vector_init(struct llama_control_vector & cvec, const llama_model & model) {`
	`13997`	`+ auto start = ggml_time_ms();`
	`13998`	`+ fprintf(stderr, "control vector init...\n");`
`13997`	`13999`	`GGML_ASSERT(cvec.tensors.empty());`
`13998`	`14000`	`GGML_ASSERT(cvec.ctxs.empty());`
`13999`	`14001`	`GGML_ASSERT(cvec.bufs.empty());`
`@@ -14016,6 +14018,9 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const`
`14016`	`14018`	`ggml_context * ctx = ggml_init(params);`
`14017`	`14019`	`if (!ctx) {`
`14018`	`14020`	`LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__);`
	`14021`	`+ auto end = ggml_time_ms();`
	`14022`	`+ fprintf(stderr, "control vector init took %ums\n", end - start);`
	`14023`	`+ return true;`
`14019`	`14024`	`return 1;`
`14020`	`14025`	`}`
`14021`	`14026`	`ctx_map[it.first] = ctx;`
`@@ -14036,24 +14041,33 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const`
`14036`	`14041`	`ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);`
`14037`	`14042`	`if (!buf) {`
`14038`	`14043`	`LLAMA_LOG_ERROR("%s: failed to allocate buffer for control vector\n", __func__);`
	`14044`	`+ auto end = ggml_time_ms();`
	`14045`	`+ fprintf(stderr, "control vector init took %ums\n", end - start);`
	`14046`	`+ return true;`
`14039`	`14047`	`return false;`
`14040`	`14048`	`}`
`14041`	`14049`	`ggml_backend_buffer_clear(buf, 0);`
`14042`	`14050`	`cvec.ctxs.push_back(ctx);`
`14043`	`14051`	`cvec.bufs.push_back(buf);`
`14044`	`14052`	`}`
`14045`	`14053`
	`14054`	`+ auto end = ggml_time_ms();`
	`14055`	`+ fprintf(stderr, "control vector init took %ums\n", end - start);`
`14046`	`14056`	`return true;`
`14047`	`14057`	`}`
`14048`	`14058`
`14049`	`14059`	`int32_t llama_control_vector_apply(struct llama_context * lctx, const float * data, size_t len, int32_t n_embd, int32_t il_start, int32_t il_end) {`
	`14060`	`+ auto start = ggml_time_ms();`
	`14061`	`+ printf("control vector apply...\n");`
`14050`	`14062`	`const llama_model & model = lctx->model;`
`14051`	`14063`	`llama_control_vector & cvec = lctx->cvec;`
`14052`	`14064`
`14053`	`14065`	`if (data == nullptr) {`
`14054`	`14066`	`// disable the current control vector (but leave allocated for later)`
`14055`	`14067`	`cvec.layer_start = -1;`
`14056`	`14068`	`cvec.layer_end = -1;`
	`14069`	`+ auto end = ggml_time_ms();`
	`14070`	`+ printf("control vector apply took %ums\n", end - start);`
`14057`	`14071`	`return 0;`
`14058`	`14072`	`}`
`14059`	`14073`
`@@ -14064,6 +14078,7 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da`
`14064`	`14078`
`14065`	`14079`	`if (cvec.tensors.empty()) {`
`14066`	`14080`	`if (!llama_control_vector_init(cvec, model)) {`
	`14081`	`+ LLAMA_LOG_ERROR("%s: control vector init failed\n", __func__);`
`14067`	`14082`	`return 1;`
`14068`	`14083`	`}`
`14069`	`14084`	`}`
`@@ -14080,6 +14095,8 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da`
`14080`	`14095`	`}`
`14081`	`14096`	`}`
`14082`	`14097`
	`14098`	`+ auto end = ggml_time_ms();`
	`14099`	`+ printf("control vector apply took %ums\n", end - start);`
`14083`	`14100`	`return 0;`
`14084`	`14101`	`}`
`14085`	`14102`