Skip to content

Commit b143532

Browse files
jukofyorkMagnusS0
authored andcommitted
Control vector loading fixes (ggml-org#8137)
* Fixed leak in llama_control_vector_load_one() and allow llama_control_vector_load() to grow * refactored `llama_control_vector_load_one()` * allow multiple directions for same layer in same file * llama_control_vector_load_one() and llama_control_vector_load() now break on error * removed unnecessary ggml_free() call
1 parent 8f0162a commit b143532

File tree

1 file changed

+74
-108
lines changed

1 file changed

+74
-108
lines changed

common/common.cpp

Lines changed: 74 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -2804,125 +2804,87 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
28042804
//
28052805

28062806
static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {
2807-
int32_t n_tensors;
2808-
2809-
size_t n_bytes = 0;
2810-
2811-
uint32_t max_direction_layer = 0;
2812-
28132807
llama_control_vector_data result = { -1, {} };
28142808

2815-
// calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
2816-
{
2817-
struct ggml_init_params meta_params = {
2818-
/* .mem_size = */ ggml_tensor_overhead() * 128 + ggml_graph_overhead(),
2819-
/* .mem_buffer = */ nullptr,
2820-
/* .no_alloc = */ true,
2821-
};
2822-
ggml_context * meta_ctx = ggml_init(meta_params);
2823-
struct gguf_init_params meta_gguf_params = {
2824-
/* .no_alloc = */ true,
2825-
/* .ctx = */ &meta_ctx,
2826-
};
2827-
struct gguf_context * meta_ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
2828-
if (!meta_ctx_gguf) {
2829-
fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str());
2830-
ggml_free(meta_ctx);
2831-
return result;
2832-
}
2833-
2834-
n_tensors = gguf_get_n_tensors(meta_ctx_gguf);
2835-
for (int i = 0; i < n_tensors; i++) {
2836-
std::string name = gguf_get_tensor_name(meta_ctx_gguf, i);
2837-
2838-
// split on '.'
2839-
size_t dotpos = name.find('.');
2840-
if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") {
2841-
try {
2842-
uint32_t layer = std::stoi(name.substr(dotpos + 1));
2843-
if (layer == 0) {
2844-
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
2845-
ggml_free(meta_ctx);
2846-
gguf_free(meta_ctx_gguf);
2847-
return result;
2848-
}
2849-
if (layer > max_direction_layer) {
2850-
max_direction_layer = layer;
2851-
}
2852-
} catch (...) {
2853-
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
2854-
ggml_free(meta_ctx);
2855-
gguf_free(meta_ctx_gguf);
2856-
return result;
2857-
}
2858-
}
2859-
2860-
struct ggml_tensor * tensor_meta = ggml_get_tensor(meta_ctx, name.c_str());
2861-
if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims(tensor_meta) != 1) {
2862-
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
2863-
ggml_free(meta_ctx);
2864-
gguf_free(meta_ctx_gguf);
2865-
return result;
2866-
}
2867-
if (result.n_embd == -1) {
2868-
result.n_embd = ggml_nelements(tensor_meta);
2869-
} else if (ggml_nelements(tensor_meta) != result.n_embd) {
2870-
fprintf(stderr, "%s: direction tensor sizes mismatched in %s\n", __func__, load_info.fname.c_str());
2871-
ggml_free(meta_ctx);
2872-
gguf_free(meta_ctx_gguf);
2873-
return result;
2874-
}
2875-
n_bytes += ggml_nbytes(tensor_meta);
2876-
}
2877-
ggml_free(meta_ctx);
2878-
gguf_free(meta_ctx_gguf);
2809+
ggml_context * ctx = nullptr;
2810+
struct gguf_init_params meta_gguf_params = {
2811+
/* .no_alloc = */ false,
2812+
/* .ctx = */ &ctx,
2813+
};
2814+
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
2815+
if (!ctx_gguf) {
2816+
fprintf(stderr, "%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str());
2817+
return result;
28792818
}
28802819

2820+
int32_t n_tensors = gguf_get_n_tensors(ctx_gguf);
28812821
if (n_tensors == 0) {
28822822
fprintf(stderr, "%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str());
2883-
return result;
28842823
}
28852824

2886-
// load and scale tensors into final control vector context
2887-
struct ggml_init_params ggml_params = {
2888-
/* .mem_size = */ ggml_tensor_overhead() * n_tensors + n_bytes,
2889-
/* .mem_buffer = */ nullptr,
2890-
/* .no_alloc = */ false,
2891-
};
2892-
struct ggml_context * ctx = ggml_init(ggml_params);
2825+
for (int i = 0; i < n_tensors; i++) {
2826+
std::string name = gguf_get_tensor_name(ctx_gguf, i);
28932827

2894-
struct gguf_init_params params = {
2895-
/*.no_alloc = */ false,
2896-
/*.ctx = */ &ctx,
2897-
};
2898-
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), params);
2899-
if (!ctx_gguf) {
2900-
fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str());
2901-
ggml_free(ctx);
2902-
return result;
2903-
}
2828+
int layer_idx = -1;
29042829

2905-
// do not store data for layer 0 (it's not used)
2906-
result.data.resize(result.n_embd * max_direction_layer);
2830+
// split on '.'
2831+
size_t dotpos = name.find('.');
2832+
if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") {
2833+
try {
2834+
layer_idx = std::stoi(name.substr(dotpos + 1));
2835+
} catch (...) {
2836+
layer_idx = -1;
2837+
}
2838+
}
2839+
if (layer_idx < 0) {
2840+
fprintf(stderr, "%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
2841+
result.n_embd = -1;
2842+
break;
2843+
} else if (layer_idx == 0) {
2844+
fprintf(stderr, "%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
2845+
result.n_embd = -1;
2846+
break;
2847+
}
29072848

2908-
for (uint32_t il = 1; il <= max_direction_layer; il++) {
2909-
const std::string name = "direction." + std::to_string(il);
2910-
const ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str());
2849+
struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str());
2850+
if (tensor->type != GGML_TYPE_F32) {
2851+
fprintf(stderr, "%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str());
2852+
result.n_embd = -1;
2853+
break;
2854+
}
2855+
if (ggml_n_dims(tensor) != 1) {
2856+
fprintf(stderr, "%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str());
2857+
result.n_embd = -1;
2858+
break;
2859+
}
2860+
2861+
if (result.n_embd == -1) {
2862+
result.n_embd = ggml_nelements(tensor);
2863+
} else if (ggml_nelements(tensor) != result.n_embd) {
2864+
fprintf(stderr, "%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str());
2865+
result.n_embd = -1;
2866+
break;
2867+
}
29112868

2912-
float * dst = result.data.data() + result.n_embd * (il - 1);
2869+
// extend if necessary - do not store data for layer 0 (it's not used)
2870+
result.data.resize(std::max(result.data.size(), static_cast<size_t>(result.n_embd * layer_idx)), 0.0f);
29132871

2914-
if (tensor) {
2915-
const float * src = (const float *) tensor->data;
2916-
for (int j = 0; j < result.n_embd; j++) {
2917-
dst[j] = src[j] * load_info.strength;
2918-
}
2919-
} else {
2920-
for (int j = 0; j < result.n_embd; j++) {
2921-
dst[j] = 0.0f;
2922-
}
2872+
const float * src = (const float *) tensor->data;
2873+
float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0]
2874+
for (int j = 0; j < result.n_embd; j++) {
2875+
dst[j] += src[j] * load_info.strength; // allows multiple directions for same layer in same file
29232876
}
2877+
29242878
}
29252879

2880+
if (result.n_embd == -1) {
2881+
fprintf(stderr, "%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str());
2882+
result.data.clear();
2883+
}
2884+
2885+
gguf_free(ctx_gguf);
2886+
ggml_free(ctx);
2887+
29262888
return result;
29272889
}
29282890

@@ -2933,24 +2895,28 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
29332895
auto cur = llama_control_vector_load_one(info);
29342896

29352897
if (cur.n_embd == -1) {
2936-
return result;
2898+
result.n_embd = -1;
2899+
break;
29372900
}
2938-
if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) {
2939-
fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
2940-
return result;
2901+
if (result.n_embd != -1 && result.n_embd != cur.n_embd) {
2902+
fprintf(stderr, "%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str());
2903+
result.n_embd = -1;
2904+
break;
29412905
}
29422906

29432907
if (result.n_embd == -1) {
29442908
result = std::move(cur);
29452909
} else {
2910+
result.data.resize(std::max(result.data.size(), cur.data.size()), 0.0f); // extend if necessary
29462911
for (size_t i = 0; i < cur.data.size(); i++) {
29472912
result.data[i] += cur.data[i];
29482913
}
29492914
}
29502915
}
29512916

29522917
if (result.n_embd == -1) {
2953-
fprintf(stderr, "%s: no vectors passed\n", __func__);
2918+
fprintf(stderr, "%s: no valid control vector files passed\n", __func__);
2919+
result.data.clear();
29542920
}
29552921

29562922
return result;

0 commit comments

Comments
 (0)