@@ -3176,24 +3176,24 @@ int main(int argc, char ** argv) {
3176
3176
res.status = 200 ; // HTTP OK
3177
3177
};
3178
3178
3179
- const auto handle_get_control_vectors = [&ctx_server, ¶ms ](const httplib::Request & req, httplib::Response & res) {
3179
+ const auto handle_get_control_vectors = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
3180
3180
json vectors = json::array ();
3181
3181
3182
- for (const auto & vec : params.control_vectors ) {
3182
+ for (const auto & vec : ctx_server. params .control_vectors ) {
3183
3183
vectors.push_back (json {
3184
3184
{ " fname" , vec.fname },
3185
3185
{ " strength" , vec.strength }
3186
3186
});
3187
3187
}
3188
3188
json data = {
3189
3189
{ " vectors" , vectors },
3190
- { " layer_start" , params.control_vector_layer_start },
3191
- { " layer_end" , params.control_vector_layer_end }
3190
+ { " layer_start" , ctx_server. params .control_vector_layer_start },
3191
+ { " layer_end" , ctx_server. params .control_vector_layer_end }
3192
3192
};
3193
3193
res.set_content (data.dump (), " application/json; charset=utf-8" );
3194
3194
};
3195
3195
3196
- const auto handle_set_control_vectors = [&ctx_server, &res_error, ¶ms, & handle_get_control_vectors](const httplib::Request & req, httplib::Response & res) {
3196
+ const auto handle_set_control_vectors = [&ctx_server, &res_error, &handle_get_control_vectors](const httplib::Request & req, httplib::Response & res) {
3197
3197
res.set_header (" Access-Control-Allow-Origin" , req.get_header_value (" Origin" ));
3198
3198
3199
3199
json data = json::parse (req.body );
@@ -3202,52 +3202,55 @@ int main(int argc, char ** argv) {
3202
3202
if (data.contains (" vectors" ) && data[" vectors" ].is_array ()) {
3203
3203
for (const auto &item : data[" vectors" ]) {
3204
3204
auto v = item.get <llama_control_vector_load_info>();
3205
- // std::cout << "Add vector: " << v.fname << " " << v.strength << "\n";
3205
+ std::cout << " Add vector: " << v.fname << " " << v.strength << " \n " ;
3206
3206
vec_params.push_back (v);
3207
3207
}
3208
3208
} else {
3209
3209
std::cerr << " No vectors passed\n " ;
3210
3210
res_error (res, format_error_response (" No vectors passed" , ERROR_TYPE_SERVER));
3211
3211
return ;
3212
3212
}
3213
- for (auto v : params.control_vectors ) {
3214
- // std::cout << "Subtract vector:" << v.fname << " " << v.strength << "\n";
3215
- vec_params.push_back ({ -v.strength , v.fname });
3216
- }
3217
3213
const auto cvec = llama_control_vector_load (vec_params);
3218
3214
if (cvec.n_embd == -1 ) {
3219
- // std::cerr << "Could not load control vector\n";
3215
+ std::cerr << " Could not load control vector\n " ;
3220
3216
res_error (res, format_error_response (" Could not load control vector" , ERROR_TYPE_SERVER));
3221
3217
return ;
3222
3218
}
3223
3219
3224
- if (params.control_vector_layer_start <= 0 ) {
3225
- params.control_vector_layer_start = 1 ;
3220
+ if (ctx_server. params .control_vector_layer_start <= 0 ) {
3221
+ ctx_server. params .control_vector_layer_start = 1 ;
3226
3222
}
3227
- if (params.control_vector_layer_end <= 0 ){
3228
- params.control_vector_layer_end = llama_n_layer (ctx_server.model );
3223
+ if (ctx_server. params .control_vector_layer_end <= 0 ){
3224
+ ctx_server. params .control_vector_layer_end = llama_n_layer (ctx_server.model );
3229
3225
}
3230
3226
int err = llama_control_vector_apply (ctx_server.ctx ,
3231
3227
cvec.data .data (),
3232
3228
cvec.data .size (),
3233
3229
cvec.n_embd ,
3234
- params.control_vector_layer_start ,
3235
- params.control_vector_layer_end );
3230
+ ctx_server. params .control_vector_layer_start ,
3231
+ ctx_server. params .control_vector_layer_end );
3236
3232
if (err) {
3237
3233
std::cerr << " Could not apply control vector\n " ;
3238
3234
res_error (res, format_error_response (" Could not apply control vector" , ERROR_TYPE_SERVER));
3239
3235
return ;
3240
3236
}
3241
- auto s = params.control_vectors .size ();
3242
- auto s2 = vec_params.size ();
3243
- params.control_vectors .clear ();
3244
- unsigned i = 0 ;
3237
+ ctx_server.params .control_vectors .clear ();
3245
3238
for (auto v : vec_params) {
3246
- if (i++ < s2 - s) {
3247
- // std::cout << "set vector param: " << v.fname << " " << v.strength << "\n";
3248
- params.control_vectors .push_back (v);
3249
- }
3239
+ // std::cout << "set vector param: " << v.fname << " " << v.strength << "\n";
3240
+ ctx_server.params .control_vectors .push_back (v);
3250
3241
}
3242
+
3243
+ /* std::cerr << "Maybe we need to do this initiation ritual before it werks?\n"; // No, it's still all garbled bullshit.
3244
+
3245
+ std::vector<llama_token> tmp = { llama_token_bos(ctx_server.model), llama_token_eos(ctx_server.model), };
3246
+ std::cerr << "decode, bro\n";
3247
+ llama_decode(ctx_server.ctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) ctx_server.params.n_batch), 0, 0));
3248
+ std::cerr << "clear that fucking cache\n";
3249
+ llama_kv_cache_clear(ctx_server.ctx);
3250
+ std::cerr << "symcr0nice or what\n";
3251
+ llama_synchronize(ctx_server.ctx);
3252
+ std::cerr << "time will tell\n";
3253
+ llama_reset_timings(ctx_server.ctx);*/
3251
3254
handle_get_control_vectors (req, res);
3252
3255
};
3253
3256
0 commit comments