@@ -13994,6 +13994,8 @@ int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const
13994
13994
}
13995
13995
13996
13996
static bool llama_control_vector_init(struct llama_control_vector & cvec, const llama_model & model) {
13997
+ auto start = ggml_time_ms();
13998
+ fprintf(stderr, "control vector init...\n");
13997
13999
GGML_ASSERT(cvec.tensors.empty());
13998
14000
GGML_ASSERT(cvec.ctxs.empty());
13999
14001
GGML_ASSERT(cvec.bufs.empty());
@@ -14016,6 +14018,9 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
14016
14018
ggml_context * ctx = ggml_init(params);
14017
14019
if (!ctx) {
14018
14020
LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__);
14021
+ auto end = ggml_time_ms();
14022
+ fprintf(stderr, "control vector init took %ums\n", end - start);
14023
+ return true;
14019
14024
return 1;
14020
14025
}
14021
14026
ctx_map[it.first] = ctx;
@@ -14036,24 +14041,33 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
14036
14041
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
14037
14042
if (!buf) {
14038
14043
LLAMA_LOG_ERROR("%s: failed to allocate buffer for control vector\n", __func__);
14044
+ auto end = ggml_time_ms();
14045
+ fprintf(stderr, "control vector init took %ums\n", end - start);
14046
+ return true;
14039
14047
return false;
14040
14048
}
14041
14049
ggml_backend_buffer_clear(buf, 0);
14042
14050
cvec.ctxs.push_back(ctx);
14043
14051
cvec.bufs.push_back(buf);
14044
14052
}
14045
14053
14054
+ auto end = ggml_time_ms();
14055
+ fprintf(stderr, "control vector init took %ums\n", end - start);
14046
14056
return true;
14047
14057
}
14048
14058
14049
14059
int32_t llama_control_vector_apply(struct llama_context * lctx, const float * data, size_t len, int32_t n_embd, int32_t il_start, int32_t il_end) {
14060
+ auto start = ggml_time_ms();
14061
+ printf("control vector apply...\n");
14050
14062
const llama_model & model = lctx->model;
14051
14063
llama_control_vector & cvec = lctx->cvec;
14052
14064
14053
14065
if (data == nullptr) {
14054
14066
// disable the current control vector (but leave allocated for later)
14055
14067
cvec.layer_start = -1;
14056
14068
cvec.layer_end = -1;
14069
+ auto end = ggml_time_ms();
14070
+ printf("control vector apply took %ums\n", end - start);
14057
14071
return 0;
14058
14072
}
14059
14073
@@ -14064,6 +14078,7 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
14064
14078
14065
14079
if (cvec.tensors.empty()) {
14066
14080
if (!llama_control_vector_init(cvec, model)) {
14081
+ LLAMA_LOG_ERROR("%s: control vector init failed\n", __func__);
14067
14082
return 1;
14068
14083
}
14069
14084
}
@@ -14080,6 +14095,8 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
14080
14095
}
14081
14096
}
14082
14097
14098
+ auto end = ggml_time_ms();
14099
+ printf("control vector apply took %ums\n", end - start);
14083
14100
return 0;
14084
14101
}
14085
14102
0 commit comments