@@ -2829,125 +2829,87 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
2829
2829
//
2830
2830
2831
2831
static llama_control_vector_data llama_control_vector_load_one (const llama_control_vector_load_info & load_info) {
2832
- int32_t n_tensors;
2833
-
2834
- size_t n_bytes = 0 ;
2835
-
2836
- uint32_t max_direction_layer = 0 ;
2837
-
2838
2832
llama_control_vector_data result = { -1 , {} };
2839
2833
2840
- // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
2841
- {
2842
- struct ggml_init_params meta_params = {
2843
- /* .mem_size = */ ggml_tensor_overhead () * 128 + ggml_graph_overhead (),
2844
- /* .mem_buffer = */ nullptr ,
2845
- /* .no_alloc = */ true ,
2846
- };
2847
- ggml_context * meta_ctx = ggml_init (meta_params);
2848
- struct gguf_init_params meta_gguf_params = {
2849
- /* .no_alloc = */ true ,
2850
- /* .ctx = */ &meta_ctx,
2851
- };
2852
- struct gguf_context * meta_ctx_gguf = gguf_init_from_file (load_info.fname .c_str (), meta_gguf_params);
2853
- if (!meta_ctx_gguf) {
2854
- fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, load_info.fname .c_str ());
2855
- ggml_free (meta_ctx);
2856
- return result;
2857
- }
2858
-
2859
- n_tensors = gguf_get_n_tensors (meta_ctx_gguf);
2860
- for (int i = 0 ; i < n_tensors; i++) {
2861
- std::string name = gguf_get_tensor_name (meta_ctx_gguf, i);
2862
-
2863
- // split on '.'
2864
- size_t dotpos = name.find (' .' );
2865
- if (dotpos != std::string::npos && name.substr (0 , dotpos) == " direction" ) {
2866
- try {
2867
- uint32_t layer = std::stoi (name.substr (dotpos + 1 ));
2868
- if (layer == 0 ) {
2869
- fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, load_info.fname .c_str ());
2870
- ggml_free (meta_ctx);
2871
- gguf_free (meta_ctx_gguf);
2872
- return result;
2873
- }
2874
- if (layer > max_direction_layer) {
2875
- max_direction_layer = layer;
2876
- }
2877
- } catch (...) {
2878
- fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, load_info.fname .c_str ());
2879
- ggml_free (meta_ctx);
2880
- gguf_free (meta_ctx_gguf);
2881
- return result;
2882
- }
2883
- }
2884
-
2885
- struct ggml_tensor * tensor_meta = ggml_get_tensor (meta_ctx, name.c_str ());
2886
- if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims (tensor_meta) != 1 ) {
2887
- fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, load_info.fname .c_str ());
2888
- ggml_free (meta_ctx);
2889
- gguf_free (meta_ctx_gguf);
2890
- return result;
2891
- }
2892
- if (result.n_embd == -1 ) {
2893
- result.n_embd = ggml_nelements (tensor_meta);
2894
- } else if (ggml_nelements (tensor_meta) != result.n_embd ) {
2895
- fprintf (stderr, " %s: direction tensor sizes mismatched in %s\n " , __func__, load_info.fname .c_str ());
2896
- ggml_free (meta_ctx);
2897
- gguf_free (meta_ctx_gguf);
2898
- return result;
2899
- }
2900
- n_bytes += ggml_nbytes (tensor_meta);
2901
- }
2902
- ggml_free (meta_ctx);
2903
- gguf_free (meta_ctx_gguf);
2834
+ ggml_context * ctx = nullptr ;
2835
+ struct gguf_init_params meta_gguf_params = {
2836
+ /* .no_alloc = */ false ,
2837
+ /* .ctx = */ &ctx,
2838
+ };
2839
+ struct gguf_context * ctx_gguf = gguf_init_from_file (load_info.fname .c_str (), meta_gguf_params);
2840
+ if (!ctx_gguf) {
2841
+ fprintf (stderr, " %s: failed to load control vector file from %s\n " , __func__, load_info.fname .c_str ());
2842
+ return result;
2904
2843
}
2905
2844
2845
+ int32_t n_tensors = gguf_get_n_tensors (ctx_gguf);
2906
2846
if (n_tensors == 0 ) {
2907
2847
fprintf (stderr, " %s: no direction tensors found in %s\n " , __func__, load_info.fname .c_str ());
2908
- return result;
2909
2848
}
2910
2849
2911
- // load and scale tensors into final control vector context
2912
- struct ggml_init_params ggml_params = {
2913
- /* .mem_size = */ ggml_tensor_overhead () * n_tensors + n_bytes,
2914
- /* .mem_buffer = */ nullptr ,
2915
- /* .no_alloc = */ false ,
2916
- };
2917
- struct ggml_context * ctx = ggml_init (ggml_params);
2850
+ for (int i = 0 ; i < n_tensors; i++) {
2851
+ std::string name = gguf_get_tensor_name (ctx_gguf, i);
2918
2852
2919
- struct gguf_init_params params = {
2920
- /* .no_alloc = */ false ,
2921
- /* .ctx = */ &ctx,
2922
- };
2923
- struct gguf_context * ctx_gguf = gguf_init_from_file (load_info.fname .c_str (), params);
2924
- if (!ctx_gguf) {
2925
- fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, load_info.fname .c_str ());
2926
- ggml_free (ctx);
2927
- return result;
2928
- }
2853
+ int layer_idx = -1 ;
2929
2854
2930
- // do not store data for layer 0 (it's not used)
2931
- result.data .resize (result.n_embd * max_direction_layer);
2855
+ // split on '.'
2856
+ size_t dotpos = name.find (' .' );
2857
+ if (dotpos != std::string::npos && name.substr (0 , dotpos) == " direction" ) {
2858
+ try {
2859
+ layer_idx = std::stoi (name.substr (dotpos + 1 ));
2860
+ } catch (...) {
2861
+ layer_idx = -1 ;
2862
+ }
2863
+ }
2864
+ if (layer_idx < 0 ) {
2865
+ fprintf (stderr, " %s: invalid/unparsable direction tensor layer index in %s\n " , __func__, load_info.fname .c_str ());
2866
+ result.n_embd = -1 ;
2867
+ break ;
2868
+ } else if (layer_idx == 0 ) {
2869
+ fprintf (stderr, " %s: invalid (zero) direction tensor layer index in %s\n " , __func__, load_info.fname .c_str ());
2870
+ result.n_embd = -1 ;
2871
+ break ;
2872
+ }
2932
2873
2933
- for (uint32_t il = 1 ; il <= max_direction_layer; il++) {
2934
- const std::string name = " direction." + std::to_string (il);
2935
- const ggml_tensor * tensor = ggml_get_tensor (ctx, name.c_str ());
2874
+ struct ggml_tensor * tensor = ggml_get_tensor (ctx, name.c_str ());
2875
+ if (tensor->type != GGML_TYPE_F32) {
2876
+ fprintf (stderr, " %s: invalid (non-F32) direction tensor type in %s\n " , __func__, load_info.fname .c_str ());
2877
+ result.n_embd = -1 ;
2878
+ break ;
2879
+ }
2880
+ if (ggml_n_dims (tensor) != 1 ) {
2881
+ fprintf (stderr, " %s: invalid (non-1D) direction tensor shape in %s\n " , __func__, load_info.fname .c_str ());
2882
+ result.n_embd = -1 ;
2883
+ break ;
2884
+ }
2885
+
2886
+ if (result.n_embd == -1 ) {
2887
+ result.n_embd = ggml_nelements (tensor);
2888
+ } else if (ggml_nelements (tensor) != result.n_embd ) {
2889
+ fprintf (stderr, " %s: direction tensor in %s does not match previous dimensions\n " , __func__, load_info.fname .c_str ());
2890
+ result.n_embd = -1 ;
2891
+ break ;
2892
+ }
2936
2893
2937
- float * dst = result.data .data () + result.n_embd * (il - 1 );
2894
+ // extend if necessary - do not store data for layer 0 (it's not used)
2895
+ result.data .resize (std::max (result.data .size (), static_cast <size_t >(result.n_embd * layer_idx)), 0 .0f );
2938
2896
2939
- if (tensor) {
2940
- const float * src = (const float *) tensor->data ;
2941
- for (int j = 0 ; j < result.n_embd ; j++) {
2942
- dst[j] = src[j] * load_info.strength ;
2943
- }
2944
- } else {
2945
- for (int j = 0 ; j < result.n_embd ; j++) {
2946
- dst[j] = 0 .0f ;
2947
- }
2897
+ const float * src = (const float *) tensor->data ;
2898
+ float * dst = result.data .data () + result.n_embd * (layer_idx - 1 ); // layer 1 at [0]
2899
+ for (int j = 0 ; j < result.n_embd ; j++) {
2900
+ dst[j] += src[j] * load_info.strength ; // allows multiple directions for same layer in same file
2948
2901
}
2902
+
2949
2903
}
2950
2904
2905
+ if (result.n_embd == -1 ) {
2906
+ fprintf (stderr, " %s: skipping %s due to invalid direction tensors\n " , __func__, load_info.fname .c_str ());
2907
+ result.data .clear ();
2908
+ }
2909
+
2910
+ gguf_free (ctx_gguf);
2911
+ ggml_free (ctx);
2912
+
2951
2913
return result;
2952
2914
}
2953
2915
@@ -2958,24 +2920,28 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
2958
2920
auto cur = llama_control_vector_load_one (info);
2959
2921
2960
2922
if (cur.n_embd == -1 ) {
2961
- return result;
2923
+ result.n_embd = -1 ;
2924
+ break ;
2962
2925
}
2963
- if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data .size () != cur.data .size ())) {
2964
- fprintf (stderr, " %s: control vector in %s does not match previous vector dimensions\n " , __func__, info.fname .c_str ());
2965
- return result;
2926
+ if (result.n_embd != -1 && result.n_embd != cur.n_embd ) {
2927
+ fprintf (stderr, " %s: control vectors in %s does not match previous dimensions\n " , __func__, info.fname .c_str ());
2928
+ result.n_embd = -1 ;
2929
+ break ;
2966
2930
}
2967
2931
2968
2932
if (result.n_embd == -1 ) {
2969
2933
result = std::move (cur);
2970
2934
} else {
2935
+ result.data .resize (std::max (result.data .size (), cur.data .size ()), 0 .0f ); // extend if necessary
2971
2936
for (size_t i = 0 ; i < cur.data .size (); i++) {
2972
2937
result.data [i] += cur.data [i];
2973
2938
}
2974
2939
}
2975
2940
}
2976
2941
2977
2942
if (result.n_embd == -1 ) {
2978
- fprintf (stderr, " %s: no vectors passed\n " , __func__);
2943
+ fprintf (stderr, " %s: no valid control vector files passed\n " , __func__);
2944
+ result.data .clear ();
2979
2945
}
2980
2946
2981
2947
return result;
0 commit comments