From 2b276756ee603a481f5f78c039381f826b9171b0 Mon Sep 17 00:00:00 2001 From: jukofyork <69222624+jukofyork@users.noreply.github.com> Date: Wed, 26 Jun 2024 14:45:00 +0100 Subject: [PATCH 1/5] Fixed leak in llama_control_vector_load_one() and allow llama_control_vector_load() to grow --- common/common.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index c76d0e2c33be5..6ffa31bd10771 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -2923,6 +2923,8 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr } } + ggml_free(ctx); + gguf_free(ctx_gguf); return result; } @@ -2933,16 +2935,17 @@ llama_control_vector_data llama_control_vector_load(const std::vector Date: Wed, 26 Jun 2024 16:45:21 +0100 Subject: [PATCH 2/5] refactored `llama_control_vector_load_one()` --- common/common.cpp | 160 ++++++++++++++++------------------------------ 1 file changed, 56 insertions(+), 104 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 6ffa31bd10771..dfa1bace56401 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -2804,127 +2804,79 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n) // static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) { - int32_t n_tensors; - - size_t n_bytes = 0; - - uint32_t max_direction_layer = 0; llama_control_vector_data result = { -1, {} }; - // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer - { - struct ggml_init_params meta_params = { - /* .mem_size = */ ggml_tensor_overhead() * 128 + ggml_graph_overhead(), - /* .mem_buffer = */ nullptr, - /* .no_alloc = */ true, - }; - ggml_context * meta_ctx = ggml_init(meta_params); - struct gguf_init_params meta_gguf_params = { - /* .no_alloc = */ true, - /* .ctx = */ &meta_ctx, - }; - struct gguf_context * meta_ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params); - if (!meta_ctx_gguf) { - fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str()); - ggml_free(meta_ctx); - return result; - } - - n_tensors = gguf_get_n_tensors(meta_ctx_gguf); - for (int i = 0; i < n_tensors; i++) { - std::string name = gguf_get_tensor_name(meta_ctx_gguf, i); - - // split on '.' - size_t dotpos = name.find('.'); - if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") { - try { - uint32_t layer = std::stoi(name.substr(dotpos + 1)); - if (layer == 0) { - fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str()); - ggml_free(meta_ctx); - gguf_free(meta_ctx_gguf); - return result; - } - if (layer > max_direction_layer) { - max_direction_layer = layer; - } - } catch (...) { - fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str()); - ggml_free(meta_ctx); - gguf_free(meta_ctx_gguf); - return result; - } - } - - struct ggml_tensor * tensor_meta = ggml_get_tensor(meta_ctx, name.c_str()); - if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims(tensor_meta) != 1) { - fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str()); - ggml_free(meta_ctx); - gguf_free(meta_ctx_gguf); - return result; - } - if (result.n_embd == -1) { - result.n_embd = ggml_nelements(tensor_meta); - } else if (ggml_nelements(tensor_meta) != result.n_embd) { - fprintf(stderr, "%s: direction tensor sizes mismatched in %s\n", __func__, load_info.fname.c_str()); - ggml_free(meta_ctx); - gguf_free(meta_ctx_gguf); - return result; - } - n_bytes += ggml_nbytes(tensor_meta); - } - ggml_free(meta_ctx); - gguf_free(meta_ctx_gguf); - } - - if (n_tensors == 0) { - fprintf(stderr, "%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str()); - return result; - } - - // load and scale tensors into final control vector context - struct ggml_init_params ggml_params = { - /* .mem_size = */ ggml_tensor_overhead() * n_tensors + n_bytes, - /* .mem_buffer = */ nullptr, - /* .no_alloc = */ false, + ggml_context * ctx = nullptr; + struct gguf_init_params meta_gguf_params = { + /* .no_alloc = */ false, + /* .ctx = */ &ctx, }; - struct ggml_context * ctx = ggml_init(ggml_params); - - struct gguf_init_params params = { - /*.no_alloc = */ false, - /*.ctx = */ &ctx, - }; - struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), params); + struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params); if (!ctx_gguf) { fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str()); ggml_free(ctx); return result; } - // do not store data for layer 0 (it's not used) - result.data.resize(result.n_embd * max_direction_layer); + int32_t n_tensors = gguf_get_n_tensors(ctx_gguf); + if (n_tensors == 0) { + fprintf(stderr, "%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str()); + } - for (uint32_t il = 1; il <= max_direction_layer; il++) { - const std::string name = "direction." + std::to_string(il); - const ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str()); + for (int i = 0; i < n_tensors; i++) { + std::string name = gguf_get_tensor_name(ctx_gguf, i); - float * dst = result.data.data() + result.n_embd * (il - 1); + int layer_idx = -1; - if (tensor) { - const float * src = (const float *) tensor->data; - for (int j = 0; j < result.n_embd; j++) { - dst[j] = src[j] * load_info.strength; - } - } else { - for (int j = 0; j < result.n_embd; j++) { - dst[j] = 0.0f; + // split on '.' + size_t dotpos = name.find('.'); + if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") { + try { + layer_idx = std::stoi(name.substr(dotpos + 1)); + } catch (...) { + layer_idx = -1; } } + if (layer_idx < 0) { + fprintf(stderr, "%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); + continue; + } else if (layer_idx == 0) { + fprintf(stderr, "%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); + continue; + } + + struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str()); + if (tensor->type != GGML_TYPE_F32) { + fprintf(stderr, "%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str()); + continue; + } + if (ggml_n_dims(tensor) != 1) { + fprintf(stderr, "%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str()); + continue; + } + + if (result.n_embd == -1) { + result.n_embd = ggml_nelements(tensor); + } else if (ggml_nelements(tensor) != result.n_embd) { + fprintf(stderr, "%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str()); + continue; + } + + // extend if necessary - do not store data for layer 0 (it's not used) + result.data.resize(std::max(result.data.size(), static_cast(result.n_embd * layer_idx)), 0.0f); + + const float * src = (const float *) tensor->data; + float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0] + for (int j = 0; j < result.n_embd; j++) { + dst[j] = src[j] * load_info.strength; + } + } - ggml_free(ctx); gguf_free(ctx_gguf); + ggml_free(ctx); + return result; } @@ -2938,7 +2890,7 @@ llama_control_vector_data llama_control_vector_load(const std::vector Date: Wed, 26 Jun 2024 20:58:49 +0100 Subject: [PATCH 3/5] allow multiple directions for same layer in same file --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index dfa1bace56401..8927a44b32a66 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -2869,7 +2869,7 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr const float * src = (const float *) tensor->data; float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0] for (int j = 0; j < result.n_embd; j++) { - dst[j] = src[j] * load_info.strength; + dst[j] += src[j] * load_info.strength; // allow multiple for same layer in same file } } From a299709134725393e17016ddd5309c6b5e63686e Mon Sep 17 00:00:00 2001 From: jukofyork <69222624+jukofyork@users.noreply.github.com> Date: Thu, 27 Jun 2024 08:16:07 +0100 Subject: [PATCH 4/5] llama_control_vector_load_one() and llama_control_vector_load() now break on error --- common/common.cpp | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 8927a44b32a66..33f41d44f12fc 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -2804,7 +2804,6 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n) // static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) { - llama_control_vector_data result = { -1, {} }; ggml_context * ctx = nullptr; @@ -2814,7 +2813,7 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr }; struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params); if (!ctx_gguf) { - fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str()); + fprintf(stderr, "%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str()); ggml_free(ctx); return result; } @@ -2840,27 +2839,32 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr } if (layer_idx < 0) { fprintf(stderr, "%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); - continue; + result.n_embd = -1; + break; } else if (layer_idx == 0) { fprintf(stderr, "%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); - continue; + result.n_embd = -1; + break; } struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str()); if (tensor->type != GGML_TYPE_F32) { fprintf(stderr, "%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str()); - continue; + result.n_embd = -1; + break; } if (ggml_n_dims(tensor) != 1) { fprintf(stderr, "%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str()); - continue; + result.n_embd = -1; + break; } if (result.n_embd == -1) { result.n_embd = ggml_nelements(tensor); } else if (ggml_nelements(tensor) != result.n_embd) { fprintf(stderr, "%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str()); - continue; + result.n_embd = -1; + break; } // extend if necessary - do not store data for layer 0 (it's not used) @@ -2869,11 +2873,16 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr const float * src = (const float *) tensor->data; float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0] for (int j = 0; j < result.n_embd; j++) { - dst[j] += src[j] * load_info.strength; // allow multiple for same layer in same file + dst[j] += src[j] * load_info.strength; // allows multiple directions for same layer in same file } } + if (result.n_embd == -1) { + fprintf(stderr, "%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str()); + result.data.clear(); + } + gguf_free(ctx_gguf); ggml_free(ctx); @@ -2887,11 +2896,13 @@ llama_control_vector_data llama_control_vector_load(const std::vector Date: Thu, 27 Jun 2024 14:38:12 +0100 Subject: [PATCH 5/5] removed unnecessary ggml_free() call --- common/common.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index 33f41d44f12fc..70349ad70891c 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -2814,7 +2814,6 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params); if (!ctx_gguf) { fprintf(stderr, "%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str()); - ggml_free(ctx); return result; }