@@ -2804,125 +2804,87 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
2804
2804
//
2805
2805
2806
2806
static llama_control_vector_data llama_control_vector_load_one (const llama_control_vector_load_info & load_info) {
2807
- int32_t n_tensors;
2808
-
2809
- size_t n_bytes = 0 ;
2810
-
2811
- uint32_t max_direction_layer = 0 ;
2812
-
2813
2807
llama_control_vector_data result = { -1 , {} };
2814
2808
2815
- // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
2816
- {
2817
- struct ggml_init_params meta_params = {
2818
- /* .mem_size = */ ggml_tensor_overhead () * 128 + ggml_graph_overhead (),
2819
- /* .mem_buffer = */ nullptr ,
2820
- /* .no_alloc = */ true ,
2821
- };
2822
- ggml_context * meta_ctx = ggml_init (meta_params);
2823
- struct gguf_init_params meta_gguf_params = {
2824
- /* .no_alloc = */ true ,
2825
- /* .ctx = */ &meta_ctx,
2826
- };
2827
- struct gguf_context * meta_ctx_gguf = gguf_init_from_file (load_info.fname .c_str (), meta_gguf_params);
2828
- if (!meta_ctx_gguf) {
2829
- fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, load_info.fname .c_str ());
2830
- ggml_free (meta_ctx);
2831
- return result;
2832
- }
2833
-
2834
- n_tensors = gguf_get_n_tensors (meta_ctx_gguf);
2835
- for (int i = 0 ; i < n_tensors; i++) {
2836
- std::string name = gguf_get_tensor_name (meta_ctx_gguf, i);
2837
-
2838
- // split on '.'
2839
- size_t dotpos = name.find (' .' );
2840
- if (dotpos != std::string::npos && name.substr (0 , dotpos) == " direction" ) {
2841
- try {
2842
- uint32_t layer = std::stoi (name.substr (dotpos + 1 ));
2843
- if (layer == 0 ) {
2844
- fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, load_info.fname .c_str ());
2845
- ggml_free (meta_ctx);
2846
- gguf_free (meta_ctx_gguf);
2847
- return result;
2848
- }
2849
- if (layer > max_direction_layer) {
2850
- max_direction_layer = layer;
2851
- }
2852
- } catch (...) {
2853
- fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, load_info.fname .c_str ());
2854
- ggml_free (meta_ctx);
2855
- gguf_free (meta_ctx_gguf);
2856
- return result;
2857
- }
2858
- }
2859
-
2860
- struct ggml_tensor * tensor_meta = ggml_get_tensor (meta_ctx, name.c_str ());
2861
- if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims (tensor_meta) != 1 ) {
2862
- fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, load_info.fname .c_str ());
2863
- ggml_free (meta_ctx);
2864
- gguf_free (meta_ctx_gguf);
2865
- return result;
2866
- }
2867
- if (result.n_embd == -1 ) {
2868
- result.n_embd = ggml_nelements (tensor_meta);
2869
- } else if (ggml_nelements (tensor_meta) != result.n_embd ) {
2870
- fprintf (stderr, " %s: direction tensor sizes mismatched in %s\n " , __func__, load_info.fname .c_str ());
2871
- ggml_free (meta_ctx);
2872
- gguf_free (meta_ctx_gguf);
2873
- return result;
2874
- }
2875
- n_bytes += ggml_nbytes (tensor_meta);
2876
- }
2877
- ggml_free (meta_ctx);
2878
- gguf_free (meta_ctx_gguf);
2809
+ ggml_context * ctx = nullptr ;
2810
+ struct gguf_init_params meta_gguf_params = {
2811
+ /* .no_alloc = */ false ,
2812
+ /* .ctx = */ &ctx,
2813
+ };
2814
+ struct gguf_context * ctx_gguf = gguf_init_from_file (load_info.fname .c_str (), meta_gguf_params);
2815
+ if (!ctx_gguf) {
2816
+ fprintf (stderr, " %s: failed to load control vector file from %s\n " , __func__, load_info.fname .c_str ());
2817
+ return result;
2879
2818
}
2880
2819
2820
+ int32_t n_tensors = gguf_get_n_tensors (ctx_gguf);
2881
2821
if (n_tensors == 0 ) {
2882
2822
fprintf (stderr, " %s: no direction tensors found in %s\n " , __func__, load_info.fname .c_str ());
2883
- return result;
2884
2823
}
2885
2824
2886
- // load and scale tensors into final control vector context
2887
- struct ggml_init_params ggml_params = {
2888
- /* .mem_size = */ ggml_tensor_overhead () * n_tensors + n_bytes,
2889
- /* .mem_buffer = */ nullptr ,
2890
- /* .no_alloc = */ false ,
2891
- };
2892
- struct ggml_context * ctx = ggml_init (ggml_params);
2825
+ for (int i = 0 ; i < n_tensors; i++) {
2826
+ std::string name = gguf_get_tensor_name (ctx_gguf, i);
2893
2827
2894
- struct gguf_init_params params = {
2895
- /* .no_alloc = */ false ,
2896
- /* .ctx = */ &ctx,
2897
- };
2898
- struct gguf_context * ctx_gguf = gguf_init_from_file (load_info.fname .c_str (), params);
2899
- if (!ctx_gguf) {
2900
- fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, load_info.fname .c_str ());
2901
- ggml_free (ctx);
2902
- return result;
2903
- }
2828
+ int layer_idx = -1 ;
2904
2829
2905
- // do not store data for layer 0 (it's not used)
2906
- result.data .resize (result.n_embd * max_direction_layer);
2830
+ // split on '.'
2831
+ size_t dotpos = name.find (' .' );
2832
+ if (dotpos != std::string::npos && name.substr (0 , dotpos) == " direction" ) {
2833
+ try {
2834
+ layer_idx = std::stoi (name.substr (dotpos + 1 ));
2835
+ } catch (...) {
2836
+ layer_idx = -1 ;
2837
+ }
2838
+ }
2839
+ if (layer_idx < 0 ) {
2840
+ fprintf (stderr, " %s: invalid/unparsable direction tensor layer index in %s\n " , __func__, load_info.fname .c_str ());
2841
+ result.n_embd = -1 ;
2842
+ break ;
2843
+ } else if (layer_idx == 0 ) {
2844
+ fprintf (stderr, " %s: invalid (zero) direction tensor layer index in %s\n " , __func__, load_info.fname .c_str ());
2845
+ result.n_embd = -1 ;
2846
+ break ;
2847
+ }
2907
2848
2908
- for (uint32_t il = 1 ; il <= max_direction_layer; il++) {
2909
- const std::string name = " direction." + std::to_string (il);
2910
- const ggml_tensor * tensor = ggml_get_tensor (ctx, name.c_str ());
2849
+ struct ggml_tensor * tensor = ggml_get_tensor (ctx, name.c_str ());
2850
+ if (tensor->type != GGML_TYPE_F32) {
2851
+ fprintf (stderr, " %s: invalid (non-F32) direction tensor type in %s\n " , __func__, load_info.fname .c_str ());
2852
+ result.n_embd = -1 ;
2853
+ break ;
2854
+ }
2855
+ if (ggml_n_dims (tensor) != 1 ) {
2856
+ fprintf (stderr, " %s: invalid (non-1D) direction tensor shape in %s\n " , __func__, load_info.fname .c_str ());
2857
+ result.n_embd = -1 ;
2858
+ break ;
2859
+ }
2860
+
2861
+ if (result.n_embd == -1 ) {
2862
+ result.n_embd = ggml_nelements (tensor);
2863
+ } else if (ggml_nelements (tensor) != result.n_embd ) {
2864
+ fprintf (stderr, " %s: direction tensor in %s does not match previous dimensions\n " , __func__, load_info.fname .c_str ());
2865
+ result.n_embd = -1 ;
2866
+ break ;
2867
+ }
2911
2868
2912
- float * dst = result.data .data () + result.n_embd * (il - 1 );
2869
+ // extend if necessary - do not store data for layer 0 (it's not used)
2870
+ result.data .resize (std::max (result.data .size (), static_cast <size_t >(result.n_embd * layer_idx)), 0 .0f );
2913
2871
2914
- if (tensor) {
2915
- const float * src = (const float *) tensor->data ;
2916
- for (int j = 0 ; j < result.n_embd ; j++) {
2917
- dst[j] = src[j] * load_info.strength ;
2918
- }
2919
- } else {
2920
- for (int j = 0 ; j < result.n_embd ; j++) {
2921
- dst[j] = 0 .0f ;
2922
- }
2872
+ const float * src = (const float *) tensor->data ;
2873
+ float * dst = result.data .data () + result.n_embd * (layer_idx - 1 ); // layer 1 at [0]
2874
+ for (int j = 0 ; j < result.n_embd ; j++) {
2875
+ dst[j] += src[j] * load_info.strength ; // allows multiple directions for same layer in same file
2923
2876
}
2877
+
2924
2878
}
2925
2879
2880
+ if (result.n_embd == -1 ) {
2881
+ fprintf (stderr, " %s: skipping %s due to invalid direction tensors\n " , __func__, load_info.fname .c_str ());
2882
+ result.data .clear ();
2883
+ }
2884
+
2885
+ gguf_free (ctx_gguf);
2886
+ ggml_free (ctx);
2887
+
2926
2888
return result;
2927
2889
}
2928
2890
@@ -2933,24 +2895,28 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
2933
2895
auto cur = llama_control_vector_load_one (info);
2934
2896
2935
2897
if (cur.n_embd == -1 ) {
2936
- return result;
2898
+ result.n_embd = -1 ;
2899
+ break ;
2937
2900
}
2938
- if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data .size () != cur.data .size ())) {
2939
- fprintf (stderr, " %s: control vector in %s does not match previous vector dimensions\n " , __func__, info.fname .c_str ());
2940
- return result;
2901
+ if (result.n_embd != -1 && result.n_embd != cur.n_embd ) {
2902
+ fprintf (stderr, " %s: control vectors in %s does not match previous dimensions\n " , __func__, info.fname .c_str ());
2903
+ result.n_embd = -1 ;
2904
+ break ;
2941
2905
}
2942
2906
2943
2907
if (result.n_embd == -1 ) {
2944
2908
result = std::move (cur);
2945
2909
} else {
2910
+ result.data .resize (std::max (result.data .size (), cur.data .size ()), 0 .0f ); // extend if necessary
2946
2911
for (size_t i = 0 ; i < cur.data .size (); i++) {
2947
2912
result.data [i] += cur.data [i];
2948
2913
}
2949
2914
}
2950
2915
}
2951
2916
2952
2917
if (result.n_embd == -1 ) {
2953
- fprintf (stderr, " %s: no vectors passed\n " , __func__);
2918
+ fprintf (stderr, " %s: no valid control vector files passed\n " , __func__);
2919
+ result.data .clear ();
2954
2920
}
2955
2921
2956
2922
return result;
0 commit comments