@@ -1891,6 +1891,7 @@ struct server_context {
1891
1891
float slot_prompt_similarity = 0 .0f ;
1892
1892
1893
1893
common_chat_templates_ptr chat_templates;
1894
+ oaicompat_parser_options oai_parser_opt;
1894
1895
1895
1896
~server_context () {
1896
1897
mtmd_free (mctx);
@@ -2086,6 +2087,15 @@ struct server_context {
2086
2087
}
2087
2088
2088
2089
metrics.init ();
2090
+
2091
+ oai_parser_opt = {
2092
+ /* use_jinja */ params_base.use_jinja ,
2093
+ /* prefill_assistant */ params_base.prefill_assistant ,
2094
+ /* reasoning_format */ params_base.reasoning_format ,
2095
+ /* common_chat_templates */ chat_templates.get (),
2096
+ /* allow_image */ mctx ? mtmd_support_vision (mctx) : false ,
2097
+ /* allow_audio */ mctx ? mtmd_support_audio (mctx) : false ,
2098
+ };
2089
2099
}
2090
2100
2091
2101
server_slot * get_slot_by_id (int id) {
@@ -4092,7 +4102,10 @@ int main(int argc, char ** argv) {
4092
4102
{ " default_generation_settings" , ctx_server.default_generation_settings_for_props },
4093
4103
{ " total_slots" , ctx_server.params_base .n_parallel },
4094
4104
{ " model_path" , ctx_server.params_base .model .path },
4095
- { " modalities" , json{{" vision" , ctx_server.mctx != nullptr }} }, // TODO: add more in the future
4105
+ { " modalities" , json{
4106
+ {" vision" , ctx_server.oai_parser_opt .allow_image },
4107
+ {" audio" , ctx_server.oai_parser_opt .allow_audio },
4108
+ } },
4096
4109
{ " chat_template" , common_chat_templates_source (ctx_server.chat_templates .get ()) },
4097
4110
{ " bos_token" , common_token_to_piece (ctx_server.ctx , llama_vocab_bos (ctx_server.vocab ), /* special= */ true )},
4098
4111
{ " eos_token" , common_token_to_piece (ctx_server.ctx , llama_vocab_eos (ctx_server.vocab ), /* special= */ true )},
@@ -4183,10 +4196,10 @@ int main(int argc, char ** argv) {
4183
4196
for (auto & file : files) {
4184
4197
mtmd::bitmap bmp (mtmd_helper_bitmap_init_from_buf (file.data (), file.size ()));
4185
4198
if (!bmp.ptr ) {
4186
- throw std::runtime_error (" Failed to load image" );
4199
+ throw std::runtime_error (" Failed to load image or audio file " );
4187
4200
}
4188
4201
// calculate bitmap hash (for KV caching)
4189
- std::string hash = fnv_hash (bmp.data (), bmp.nx ()*bmp. ny ()* 3 );
4202
+ std::string hash = fnv_hash (bmp.data (), bmp.n_bytes () );
4190
4203
bmp.set_id (hash.c_str ());
4191
4204
bitmaps.entries .push_back (std::move (bmp));
4192
4205
}
@@ -4418,7 +4431,7 @@ int main(int argc, char ** argv) {
4418
4431
OAICOMPAT_TYPE_NONE); // infill is not OAI compatible
4419
4432
};
4420
4433
4421
- const auto handle_chat_completions = [&ctx_server, ¶ms, & res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
4434
+ const auto handle_chat_completions = [&ctx_server, &res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
4422
4435
LOG_DBG (" request: %s\n " , req.body .c_str ());
4423
4436
if (ctx_server.params_base .embedding ) {
4424
4437
res_error (res, format_error_response (" This server does not support completions. Start it without `--embeddings`" , ERROR_TYPE_NOT_SUPPORTED));
@@ -4427,13 +4440,9 @@ int main(int argc, char ** argv) {
4427
4440
4428
4441
auto body = json::parse (req.body );
4429
4442
std::vector<raw_buffer> files;
4430
- json data = oaicompat_completion_params_parse (
4443
+ json data = oaicompat_chat_params_parse (
4431
4444
body,
4432
- params.use_jinja ,
4433
- params.prefill_assistant ,
4434
- params.reasoning_format ,
4435
- ctx_server.chat_templates .get (),
4436
- ctx_server.mctx ,
4445
+ ctx_server.oai_parser_opt ,
4437
4446
files);
4438
4447
4439
4448
handle_completions_impl (
@@ -4446,16 +4455,12 @@ int main(int argc, char ** argv) {
4446
4455
};
4447
4456
4448
4457
// same with handle_chat_completions, but without inference part
4449
- const auto handle_apply_template = [&ctx_server, ¶ms, & res_ok](const httplib::Request & req, httplib::Response & res) {
4458
+ const auto handle_apply_template = [&ctx_server, &res_ok](const httplib::Request & req, httplib::Response & res) {
4450
4459
auto body = json::parse (req.body );
4451
4460
std::vector<raw_buffer> files; // dummy, unused
4452
- json data = oaicompat_completion_params_parse (
4461
+ json data = oaicompat_chat_params_parse (
4453
4462
body,
4454
- params.use_jinja ,
4455
- params.prefill_assistant ,
4456
- params.reasoning_format ,
4457
- ctx_server.chat_templates .get (),
4458
- ctx_server.mctx ,
4463
+ ctx_server.oai_parser_opt ,
4459
4464
files);
4460
4465
res_ok (res, {{ " prompt" , std::move (data.at (" prompt" )) }});
4461
4466
};
0 commit comments