Skip to content

Commit bd9f6b9

Browse files
committed
log time measurements
1 parent d0304f7 commit bd9f6b9

File tree

2 files changed

+27
-3
lines changed

2 files changed

+27
-3
lines changed

common/common.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2640,6 +2640,8 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
26402640
//
26412641

26422642
static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {
2643+
auto start = ggml_time_ms();
2644+
printf("control vector load_one...\n");
26432645
int32_t n_tensors;
26442646

26452647
size_t n_bytes = 0;
@@ -2684,7 +2686,6 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
26842686
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
26852687
gguf_free(meta_ctx_gguf);
26862688
ggml_free(meta_ctx);
2687-
return result;
26882689
}
26892690
}
26902691

@@ -2751,10 +2752,14 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
27512752
gguf_free(ctx_gguf);
27522753
ggml_free(ctx);
27532754

2755+
auto end = ggml_time_ms();
2756+
printf("control vector load_one took %ums\n", end - start);
27542757
return result;
27552758
}
27562759

27572760
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos) {
2761+
auto start = ggml_time_ms();
2762+
printf("control vector load...\n");
27582763
llama_control_vector_data result = { -1, {} };
27592764

27602765
for (const auto & info : load_infos) {
@@ -2764,7 +2769,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
27642769
return result;
27652770
}
27662771
if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) {
2767-
fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
2772+
printf("%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
27682773
return result;
27692774
}
27702775

@@ -2778,8 +2783,10 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
27782783
}
27792784

27802785
if (result.n_embd == -1) {
2781-
fprintf(stderr, "%s: no vectors passed\n", __func__);
2786+
printf("%s: no vectors passed\n", __func__);
27822787
}
27832788

2789+
auto end = ggml_time_ms();
2790+
printf("control vector load time: %ums\n", end-start);
27842791
return result;
27852792
}

llama.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13994,6 +13994,8 @@ int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const
1399413994
}
1399513995

1399613996
static bool llama_control_vector_init(struct llama_control_vector & cvec, const llama_model & model) {
13997+
auto start = ggml_time_ms();
13998+
fprintf(stderr, "control vector init...\n");
1399713999
GGML_ASSERT(cvec.tensors.empty());
1399814000
GGML_ASSERT(cvec.ctxs.empty());
1399914001
GGML_ASSERT(cvec.bufs.empty());
@@ -14016,6 +14018,9 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
1401614018
ggml_context * ctx = ggml_init(params);
1401714019
if (!ctx) {
1401814020
LLAMA_LOG_ERROR("%s: failed to allocate context for control vector\n", __func__);
14021+
auto end = ggml_time_ms();
14022+
fprintf(stderr, "control vector init took %ums\n", end - start);
14023+
return true;
1401914024
return 1;
1402014025
}
1402114026
ctx_map[it.first] = ctx;
@@ -14036,24 +14041,33 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
1403614041
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
1403714042
if (!buf) {
1403814043
LLAMA_LOG_ERROR("%s: failed to allocate buffer for control vector\n", __func__);
14044+
auto end = ggml_time_ms();
14045+
fprintf(stderr, "control vector init took %ums\n", end - start);
14046+
return true;
1403914047
return false;
1404014048
}
1404114049
ggml_backend_buffer_clear(buf, 0);
1404214050
cvec.ctxs.push_back(ctx);
1404314051
cvec.bufs.push_back(buf);
1404414052
}
1404514053

14054+
auto end = ggml_time_ms();
14055+
fprintf(stderr, "control vector init took %ums\n", end - start);
1404614056
return true;
1404714057
}
1404814058

1404914059
int32_t llama_control_vector_apply(struct llama_context * lctx, const float * data, size_t len, int32_t n_embd, int32_t il_start, int32_t il_end) {
14060+
auto start = ggml_time_ms();
14061+
printf("control vector apply...\n");
1405014062
const llama_model & model = lctx->model;
1405114063
llama_control_vector & cvec = lctx->cvec;
1405214064

1405314065
if (data == nullptr) {
1405414066
// disable the current control vector (but leave allocated for later)
1405514067
cvec.layer_start = -1;
1405614068
cvec.layer_end = -1;
14069+
auto end = ggml_time_ms();
14070+
printf("control vector apply took %ums\n", end - start);
1405714071
return 0;
1405814072
}
1405914073

@@ -14064,6 +14078,7 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
1406414078

1406514079
if (cvec.tensors.empty()) {
1406614080
if (!llama_control_vector_init(cvec, model)) {
14081+
LLAMA_LOG_ERROR("%s: control vector init failed\n", __func__);
1406714082
return 1;
1406814083
}
1406914084
}
@@ -14080,6 +14095,8 @@ int32_t llama_control_vector_apply(struct llama_context * lctx, const float * da
1408014095
}
1408114096
}
1408214097

14098+
auto end = ggml_time_ms();
14099+
printf("control vector apply took %ums\n", end - start);
1408314100
return 0;
1408414101
}
1408514102

0 commit comments

Comments
 (0)