@@ -1714,6 +1714,8 @@ struct server_queue {
1714
1714
};
1715
1715
1716
1716
struct server_response {
1717
+ bool running = true ;
1718
+
1717
1719
// for keeping track of all tasks waiting for the result
1718
1720
std::unordered_set<int > waiting_task_ids;
1719
1721
@@ -1768,6 +1770,10 @@ struct server_response {
1768
1770
while (true ) {
1769
1771
std::unique_lock<std::mutex> lock (mutex_results);
1770
1772
condition_results.wait (lock, [&]{
1773
+ if (!running) {
1774
+ SRV_DBG (" %s : queue result stop\n " , __func__);
1775
+ std::terminate (); // we cannot return here since the caller is HTTP code
1776
+ }
1771
1777
return !queue_results.empty ();
1772
1778
});
1773
1779
@@ -1798,6 +1804,10 @@ struct server_response {
1798
1804
}
1799
1805
1800
1806
std::cv_status cr_res = condition_results.wait_for (lock, std::chrono::seconds (timeout));
1807
+ if (!running) {
1808
+ SRV_DBG (" %s : queue result stop\n " , __func__);
1809
+ std::terminate (); // we cannot return here since the caller is HTTP code
1810
+ }
1801
1811
if (cr_res == std::cv_status::timeout) {
1802
1812
return nullptr ;
1803
1813
}
@@ -1827,6 +1837,12 @@ struct server_response {
1827
1837
}
1828
1838
}
1829
1839
}
1840
+
1841
+ // terminate the waiting loop
1842
+ void terminate () {
1843
+ running = false ;
1844
+ condition_results.notify_all ();
1845
+ }
1830
1846
};
1831
1847
1832
1848
struct server_context {
@@ -4503,9 +4519,10 @@ int main(int argc, char ** argv) {
4503
4519
svr->new_task_queue = [¶ms] { return new httplib::ThreadPool (params.n_threads_http ); };
4504
4520
4505
4521
// clean up function, to be called before exit
4506
- auto clean_up = [&svr]() {
4522
+ auto clean_up = [&svr, &ctx_server ]() {
4507
4523
SRV_INF (" %s: cleaning up before exit...\n " , __func__);
4508
4524
svr->stop ();
4525
+ ctx_server.queue_results .terminate ();
4509
4526
llama_backend_free ();
4510
4527
};
4511
4528
@@ -4546,7 +4563,7 @@ int main(int argc, char ** argv) {
4546
4563
4547
4564
if (!ctx_server.load_model (params)) {
4548
4565
clean_up ();
4549
- // t.join(); // FIXME: see below
4566
+ t.join ();
4550
4567
LOG_ERR (" %s: exiting due to model loading error\n " , __func__);
4551
4568
return 1 ;
4552
4569
}
@@ -4594,7 +4611,7 @@ int main(int argc, char ** argv) {
4594
4611
ctx_server.queue_tasks .start_loop ();
4595
4612
4596
4613
clean_up ();
4597
- // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this
4614
+ t.join ();
4598
4615
4599
4616
return 0 ;
4600
4617
}
0 commit comments