@@ -3707,6 +3707,7 @@ int main(int argc, char ** argv) {
3707
3707
" /health" ,
3708
3708
" /models" ,
3709
3709
" /v1/models" ,
3710
+ " /api/tags"
3710
3711
};
3711
3712
3712
3713
// If API key is not set, skip validation
@@ -3745,7 +3746,7 @@ int main(int argc, char ** argv) {
3745
3746
if (req.path == " /" || tmp.back () == " html" ) {
3746
3747
res.set_content (reinterpret_cast <const char *>(loading_html), loading_html_len, " text/html; charset=utf-8" );
3747
3748
res.status = 503 ;
3748
- } else if (req.path == " /models" || req.path == " /v1/models" ) {
3749
+ } else if (req.path == " /models" || req.path == " /v1/models" || req. path == " /api/tags " ) {
3749
3750
// allow the models endpoint to be accessed during loading
3750
3751
return true ;
3751
3752
} else {
@@ -4083,6 +4084,19 @@ int main(int argc, char ** argv) {
4083
4084
{ " llama.context_length" , ctx_server.slots .back ().n_ctx , },
4084
4085
}
4085
4086
},
4087
+ {" modelfile" , " " },
4088
+ {" parameters" , " " },
4089
+ {" template" , common_chat_templates_source (ctx_server.chat_templates .get ())},
4090
+ {" details" , {
4091
+ {" parent_model" , " " },
4092
+ {" format" , " gguf" },
4093
+ {" family" , " " },
4094
+ {" families" , {" " }},
4095
+ {" parameter_size" , " " },
4096
+ {" quantization_level" , " " }
4097
+ }},
4098
+ {" model_info" , " " },
4099
+ {" capabilities" , {" completion" }}
4086
4100
};
4087
4101
4088
4102
res_ok (res, data);
@@ -4408,6 +4422,28 @@ int main(int argc, char ** argv) {
4408
4422
}
4409
4423
4410
4424
json models = {
4425
+ {" models" , {
4426
+ {
4427
+ {" name" , params.model_alias .empty () ? params.model .path : params.model_alias },
4428
+ {" model" , params.model_alias .empty () ? params.model .path : params.model_alias },
4429
+ {" modified_at" , " " },
4430
+ {" size" , " " },
4431
+ {" digest" , " " }, // dummy value, llama.cpp does not support managing model file's hash
4432
+ {" type" , " model" },
4433
+ {" description" , " " },
4434
+ {" tags" , {" " }},
4435
+ {" capabilities" , {" completion" }},
4436
+ {" parameters" , " " },
4437
+ {" details" , {
4438
+ {" parent_model" , " " },
4439
+ {" format" , " gguf" },
4440
+ {" family" , " " },
4441
+ {" families" , {" " }},
4442
+ {" parameter_size" , " " },
4443
+ {" quantization_level" , " " }
4444
+ }}
4445
+ }
4446
+ }},
4411
4447
{" object" , " list" },
4412
4448
{" data" , {
4413
4449
{
@@ -4417,7 +4453,7 @@ int main(int argc, char ** argv) {
4417
4453
{" owned_by" , " llamacpp" },
4418
4454
{" meta" , model_meta},
4419
4455
},
4420
- }}
4456
+ }}
4421
4457
};
4422
4458
4423
4459
res_ok (res, models);
@@ -4745,11 +4781,13 @@ int main(int argc, char ** argv) {
4745
4781
svr->Post (" /api/show" , handle_api_show);
4746
4782
svr->Get (" /models" , handle_models); // public endpoint (no API key check)
4747
4783
svr->Get (" /v1/models" , handle_models); // public endpoint (no API key check)
4784
+ svr->Get (" /api/tags" , handle_models); // ollama specific endpoint. public endpoint (no API key check)
4748
4785
svr->Post (" /completion" , handle_completions); // legacy
4749
4786
svr->Post (" /completions" , handle_completions);
4750
4787
svr->Post (" /v1/completions" , handle_completions_oai);
4751
4788
svr->Post (" /chat/completions" , handle_chat_completions);
4752
4789
svr->Post (" /v1/chat/completions" , handle_chat_completions);
4790
+ svr->Post (" /api/chat" , handle_chat_completions); // ollama specific endpoint
4753
4791
svr->Post (" /infill" , handle_infill);
4754
4792
svr->Post (" /embedding" , handle_embeddings); // legacy
4755
4793
svr->Post (" /embeddings" , handle_embeddings);
0 commit comments