@@ -1705,6 +1705,8 @@ struct server_queue {
1705
1705
};
1706
1706
1707
1707
struct server_response {
1708
+ bool running = true ;
1709
+
1708
1710
// for keeping track of all tasks waiting for the result
1709
1711
std::unordered_set<int > waiting_task_ids;
1710
1712
@@ -1759,6 +1761,10 @@ struct server_response {
1759
1761
while (true ) {
1760
1762
std::unique_lock<std::mutex> lock (mutex_results);
1761
1763
condition_results.wait (lock, [&]{
1764
+ if (!running) {
1765
+ SRV_DBG (" %s : queue result stop\n " , __func__);
1766
+ std::terminate (); // we cannot return here since the caller is HTTP code
1767
+ }
1762
1768
return !queue_results.empty ();
1763
1769
});
1764
1770
@@ -1789,6 +1795,10 @@ struct server_response {
1789
1795
}
1790
1796
1791
1797
std::cv_status cr_res = condition_results.wait_for (lock, std::chrono::seconds (timeout));
1798
+ if (!running) {
1799
+ SRV_DBG (" %s : queue result stop\n " , __func__);
1800
+ std::terminate (); // we cannot return here since the caller is HTTP code
1801
+ }
1792
1802
if (cr_res == std::cv_status::timeout) {
1793
1803
return nullptr ;
1794
1804
}
@@ -1818,6 +1828,12 @@ struct server_response {
1818
1828
}
1819
1829
}
1820
1830
}
1831
+
1832
+ // terminate the waiting loop
1833
+ void terminate () {
1834
+ running = false ;
1835
+ condition_results.notify_all ();
1836
+ }
1821
1837
};
1822
1838
1823
1839
struct server_context {
@@ -4491,9 +4507,10 @@ int main(int argc, char ** argv) {
4491
4507
svr->new_task_queue = [¶ms] { return new httplib::ThreadPool (params.n_threads_http ); };
4492
4508
4493
4509
// clean up function, to be called before exit
4494
- auto clean_up = [&svr]() {
4510
+ auto clean_up = [&svr, &ctx_server ]() {
4495
4511
SRV_INF (" %s: cleaning up before exit...\n " , __func__);
4496
4512
svr->stop ();
4513
+ ctx_server.queue_results .terminate ();
4497
4514
llama_backend_free ();
4498
4515
};
4499
4516
@@ -4534,7 +4551,7 @@ int main(int argc, char ** argv) {
4534
4551
4535
4552
if (!ctx_server.load_model (params)) {
4536
4553
clean_up ();
4537
- // t.join(); // FIXME: see below
4554
+ t.join ();
4538
4555
LOG_ERR (" %s: exiting due to model loading error\n " , __func__);
4539
4556
return 1 ;
4540
4557
}
@@ -4582,7 +4599,7 @@ int main(int argc, char ** argv) {
4582
4599
ctx_server.queue_tasks .start_loop ();
4583
4600
4584
4601
clean_up ();
4585
- // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this
4602
+ t.join ();
4586
4603
4587
4604
return 0 ;
4588
4605
}
0 commit comments