Skip to content

Commit 0a6933f

Browse files
jukofyorkNexesenex
authored andcommitted
Control vector loading fixes (ggml-org#8137)
* Fixed leak in llama_control_vector_load_one() and allow llama_control_vector_load() to grow * refactored `llama_control_vector_load_one()` * allow multiple directions for same layer in same file * llama_control_vector_load_one() and llama_control_vector_load() now break on error * removed unnecessary ggml_free() call
1 parent 828c6b1 commit 0a6933f

File tree

1 file changed

+74
-108
lines changed

1 file changed

+74
-108
lines changed

common/common.cpp

Lines changed: 74 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -2829,125 +2829,87 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
28292829
//
28302830

28312831
static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) {
2832-
int32_t n_tensors;
2833-
2834-
size_t n_bytes = 0;
2835-
2836-
uint32_t max_direction_layer = 0;
2837-
28382832
llama_control_vector_data result = { -1, {} };
28392833

2840-
// calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
2841-
{
2842-
struct ggml_init_params meta_params = {
2843-
/* .mem_size = */ ggml_tensor_overhead() * 128 + ggml_graph_overhead(),
2844-
/* .mem_buffer = */ nullptr,
2845-
/* .no_alloc = */ true,
2846-
};
2847-
ggml_context * meta_ctx = ggml_init(meta_params);
2848-
struct gguf_init_params meta_gguf_params = {
2849-
/* .no_alloc = */ true,
2850-
/* .ctx = */ &meta_ctx,
2851-
};
2852-
struct gguf_context * meta_ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
2853-
if (!meta_ctx_gguf) {
2854-
fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str());
2855-
ggml_free(meta_ctx);
2856-
return result;
2857-
}
2858-
2859-
n_tensors = gguf_get_n_tensors(meta_ctx_gguf);
2860-
for (int i = 0; i < n_tensors; i++) {
2861-
std::string name = gguf_get_tensor_name(meta_ctx_gguf, i);
2862-
2863-
// split on '.'
2864-
size_t dotpos = name.find('.');
2865-
if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") {
2866-
try {
2867-
uint32_t layer = std::stoi(name.substr(dotpos + 1));
2868-
if (layer == 0) {
2869-
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
2870-
ggml_free(meta_ctx);
2871-
gguf_free(meta_ctx_gguf);
2872-
return result;
2873-
}
2874-
if (layer > max_direction_layer) {
2875-
max_direction_layer = layer;
2876-
}
2877-
} catch (...) {
2878-
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
2879-
ggml_free(meta_ctx);
2880-
gguf_free(meta_ctx_gguf);
2881-
return result;
2882-
}
2883-
}
2884-
2885-
struct ggml_tensor * tensor_meta = ggml_get_tensor(meta_ctx, name.c_str());
2886-
if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims(tensor_meta) != 1) {
2887-
fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str());
2888-
ggml_free(meta_ctx);
2889-
gguf_free(meta_ctx_gguf);
2890-
return result;
2891-
}
2892-
if (result.n_embd == -1) {
2893-
result.n_embd = ggml_nelements(tensor_meta);
2894-
} else if (ggml_nelements(tensor_meta) != result.n_embd) {
2895-
fprintf(stderr, "%s: direction tensor sizes mismatched in %s\n", __func__, load_info.fname.c_str());
2896-
ggml_free(meta_ctx);
2897-
gguf_free(meta_ctx_gguf);
2898-
return result;
2899-
}
2900-
n_bytes += ggml_nbytes(tensor_meta);
2901-
}
2902-
ggml_free(meta_ctx);
2903-
gguf_free(meta_ctx_gguf);
2834+
ggml_context * ctx = nullptr;
2835+
struct gguf_init_params meta_gguf_params = {
2836+
/* .no_alloc = */ false,
2837+
/* .ctx = */ &ctx,
2838+
};
2839+
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
2840+
if (!ctx_gguf) {
2841+
fprintf(stderr, "%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str());
2842+
return result;
29042843
}
29052844

2845+
int32_t n_tensors = gguf_get_n_tensors(ctx_gguf);
29062846
if (n_tensors == 0) {
29072847
fprintf(stderr, "%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str());
2908-
return result;
29092848
}
29102849

2911-
// load and scale tensors into final control vector context
2912-
struct ggml_init_params ggml_params = {
2913-
/* .mem_size = */ ggml_tensor_overhead() * n_tensors + n_bytes,
2914-
/* .mem_buffer = */ nullptr,
2915-
/* .no_alloc = */ false,
2916-
};
2917-
struct ggml_context * ctx = ggml_init(ggml_params);
2850+
for (int i = 0; i < n_tensors; i++) {
2851+
std::string name = gguf_get_tensor_name(ctx_gguf, i);
29182852

2919-
struct gguf_init_params params = {
2920-
/*.no_alloc = */ false,
2921-
/*.ctx = */ &ctx,
2922-
};
2923-
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), params);
2924-
if (!ctx_gguf) {
2925-
fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str());
2926-
ggml_free(ctx);
2927-
return result;
2928-
}
2853+
int layer_idx = -1;
29292854

2930-
// do not store data for layer 0 (it's not used)
2931-
result.data.resize(result.n_embd * max_direction_layer);
2855+
// split on '.'
2856+
size_t dotpos = name.find('.');
2857+
if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") {
2858+
try {
2859+
layer_idx = std::stoi(name.substr(dotpos + 1));
2860+
} catch (...) {
2861+
layer_idx = -1;
2862+
}
2863+
}
2864+
if (layer_idx < 0) {
2865+
fprintf(stderr, "%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
2866+
result.n_embd = -1;
2867+
break;
2868+
} else if (layer_idx == 0) {
2869+
fprintf(stderr, "%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
2870+
result.n_embd = -1;
2871+
break;
2872+
}
29322873

2933-
for (uint32_t il = 1; il <= max_direction_layer; il++) {
2934-
const std::string name = "direction." + std::to_string(il);
2935-
const ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str());
2874+
struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str());
2875+
if (tensor->type != GGML_TYPE_F32) {
2876+
fprintf(stderr, "%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str());
2877+
result.n_embd = -1;
2878+
break;
2879+
}
2880+
if (ggml_n_dims(tensor) != 1) {
2881+
fprintf(stderr, "%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str());
2882+
result.n_embd = -1;
2883+
break;
2884+
}
2885+
2886+
if (result.n_embd == -1) {
2887+
result.n_embd = ggml_nelements(tensor);
2888+
} else if (ggml_nelements(tensor) != result.n_embd) {
2889+
fprintf(stderr, "%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str());
2890+
result.n_embd = -1;
2891+
break;
2892+
}
29362893

2937-
float * dst = result.data.data() + result.n_embd * (il - 1);
2894+
// extend if necessary - do not store data for layer 0 (it's not used)
2895+
result.data.resize(std::max(result.data.size(), static_cast<size_t>(result.n_embd * layer_idx)), 0.0f);
29382896

2939-
if (tensor) {
2940-
const float * src = (const float *) tensor->data;
2941-
for (int j = 0; j < result.n_embd; j++) {
2942-
dst[j] = src[j] * load_info.strength;
2943-
}
2944-
} else {
2945-
for (int j = 0; j < result.n_embd; j++) {
2946-
dst[j] = 0.0f;
2947-
}
2897+
const float * src = (const float *) tensor->data;
2898+
float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0]
2899+
for (int j = 0; j < result.n_embd; j++) {
2900+
dst[j] += src[j] * load_info.strength; // allows multiple directions for same layer in same file
29482901
}
2902+
29492903
}
29502904

2905+
if (result.n_embd == -1) {
2906+
fprintf(stderr, "%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str());
2907+
result.data.clear();
2908+
}
2909+
2910+
gguf_free(ctx_gguf);
2911+
ggml_free(ctx);
2912+
29512913
return result;
29522914
}
29532915

@@ -2958,24 +2920,28 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
29582920
auto cur = llama_control_vector_load_one(info);
29592921

29602922
if (cur.n_embd == -1) {
2961-
return result;
2923+
result.n_embd = -1;
2924+
break;
29622925
}
2963-
if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) {
2964-
fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str());
2965-
return result;
2926+
if (result.n_embd != -1 && result.n_embd != cur.n_embd) {
2927+
fprintf(stderr, "%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str());
2928+
result.n_embd = -1;
2929+
break;
29662930
}
29672931

29682932
if (result.n_embd == -1) {
29692933
result = std::move(cur);
29702934
} else {
2935+
result.data.resize(std::max(result.data.size(), cur.data.size()), 0.0f); // extend if necessary
29712936
for (size_t i = 0; i < cur.data.size(); i++) {
29722937
result.data[i] += cur.data[i];
29732938
}
29742939
}
29752940
}
29762941

29772942
if (result.n_embd == -1) {
2978-
fprintf(stderr, "%s: no vectors passed\n", __func__);
2943+
fprintf(stderr, "%s: no valid control vector files passed\n", __func__);
2944+
result.data.clear();
29792945
}
29802946

29812947
return result;

0 commit comments

Comments
 (0)