-
Notifications
You must be signed in to change notification settings - Fork 12k
server : fix thread.join() on exit #12831
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
TBh, I like this approach over the other one.
Thanks for fixing it.
it's not working @ngxson @qnixsynapse download the latest release b5083 $ in terminal
$ in chrome <----
$ in terminal
|
I think best here is to allow the server to throw an exception if a task is running and it receives a termination signal: diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 1bf1ee87..c4a01547 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1,3 +1,4 @@
+#include <stdexcept>
#include "utils.hpp"
#include "arg.h"
@@ -1763,7 +1764,7 @@ struct server_response {
condition_results.wait(lock, [&]{
if (!running) {
SRV_DBG("%s : queue result stop\n", __func__);
- std::terminate(); // we cannot return here since the caller is HTTP code
+ throw std::runtime_error("Server task terminated"); // we cannot return here since the caller is HTTP code
}
return !queue_results.empty();
});
@@ -1797,7 +1798,7 @@ struct server_response {
std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout));
if (!running) {
SRV_DBG("%s : queue result stop\n", __func__);
- std::terminate(); // we cannot return here since the caller is HTTP code
+ throw std::runtime_error("Server task terminated"); // we cannot return here since the caller is HTTP code
}
if (cr_res == std::cv_status::timeout) {
return nullptr; |
the
so may by just
|
@qnixsynapse Yes, throwing an exception makes more sense, as the HTTP will catch it @tyronecai That will work but not optimal, we will still need to wait until |
close all client socks in httplib.h Server.stop is good solution ? diff --git a/examples/server/httplib.h b/examples/server/httplib.h
index 0f981dc8..5255ce6e 100644
--- a/examples/server/httplib.h
+++ b/examples/server/httplib.h
@@ -1117,9 +1117,22 @@ private:
virtual bool process_and_close_socket(socket_t sock);
+ inline void record_client_sock(socket_t sock) {
+ std::lock_guard<std::mutex> guard(cilent_sock_set_mutex_);
+ client_sock_set_.insert(sock);
+ }
+ inline void remove_client_sock(socket_t sock) {
+ std::lock_guard<std::mutex> guard(cilent_sock_set_mutex_);
+ client_sock_set_.erase(sock);
+ }
+
std::atomic<bool> is_running_{false};
std::atomic<bool> is_decommissioned{false};
+ // record all client socks
+ std::mutex cilent_sock_set_mutex_;
+ std::unordered_set<socket_t> client_sock_set_;
+
struct MountPointEntry {
std::string mount_point;
std::string base_dir;
@@ -6564,6 +6577,13 @@ inline void Server::stop() {
std::atomic<socket_t> sock(svr_sock_.exchange(INVALID_SOCKET));
detail::shutdown_socket(sock);
detail::close_socket(sock);
+
+ std::lock_guard<std::mutex> guard(cilent_sock_set_mutex_);
+ for (auto it = client_sock_set_.begin(); it != client_sock_set_.end();) {
+ detail::shutdown_socket(*it);
+ detail::close_socket(*it);
+ it = client_sock_set_.erase(it);
+ }
}
is_decommissioned = false;
}
@@ -7014,8 +7034,10 @@ inline bool Server::listen_internal() {
detail::set_socket_opt_time(sock, SOL_SOCKET, SO_SNDTIMEO,
write_timeout_sec_, write_timeout_usec_);
+ record_client_sock(sock);
if (!task_queue->enqueue(
[this, sock]() { process_and_close_socket(sock); })) {
+ remove_client_sock(sock);
detail::shutdown_socket(sock);
detail::close_socket(sock);
}
@@ -7436,6 +7458,7 @@ inline bool Server::process_and_close_socket(socket_t sock) {
nullptr);
});
+ remove_client_sock(sock);
detail::shutdown_socket(sock);
detail::close_socket(sock);
return ret;
@@ -9449,6 +9472,7 @@ inline bool SSLServer::process_and_close_socket(socket_t sock) {
detail::ssl_delete(ctx_mutex_, ssl, sock, shutdown_gracefully);
}
+ remove_client_sock(sock);
detail::shutdown_socket(sock);
detail::close_socket(sock);
return ret; |
I don't think @yhirose gonna like it, it's best to do it on llama.cpp side TL;DR for you @yhirose , we're finding a way to stop HTTP thread when server stop, but it's quite tricky in case the thread is busy waiting for something. I think throwing an exception would be the best, right? Because it will be catched by exception handler anyway |
thanks for your patience diff --git a/tmp/a.cc b/../examples/server/server.cpp
index 1bf1ee87..9fe7d896 100644
--- a/tmp/a.cc
+++ b/../examples/server/server.cpp
@@ -1762,12 +1762,15 @@ struct server_response {
std::unique_lock<std::mutex> lock(mutex_results);
condition_results.wait(lock, [&]{
if (!running) {
- SRV_DBG("%s : queue result stop\n", __func__);
- std::terminate(); // we cannot return here since the caller is HTTP code
+ SRV_DBG("%s : queue result stop\n", __func__);
+ return true;
}
return !queue_results.empty();
});
-
+ if (!running) {
+ SRV_DBG("%s : queue result stop1\n", __func__);
+ return std::make_unique<server_task_result_error>();
+ }
for (size_t i = 0; i < queue_results.size(); i++) {
if (id_tasks.find(queue_results[i]->id) != id_tasks.end()) {
server_task_result_ptr res = std::move(queue_results[i]);
@@ -1796,8 +1799,8 @@ struct server_response {
std::cv_status cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout));
if (!running) {
- SRV_DBG("%s : queue result stop\n", __func__);
- std::terminate(); // we cannot return here since the caller is HTTP code
+ SRV_DBG("%s : queue result stop\n", __func__);
+ return std::make_unique<server_task_result_error>();
}
if (cr_res == std::cv_status::timeout) {
return nullptr;
|
* master: (123 commits) cuda : add f32 to bf16 copy op (ggml-org#12806) llava: improve clip_ctx destructor to not memleak load_image_size (ggml-org#12834) llama : fix FA when KV cache is not used (i.e. embeddings) (ggml-org#12825) server : fix thread.join() on exit (ggml-org#12831) llava: add more helper functions to check projector types in clip context (ggml-org#12824) arg : Including limits file on AIX (ggml-org#12822) server : webui : Improve Chat Input with Auto-Sizing Textarea (ggml-org#12785) Revert "sycl:remove redundant memcopy in function ggml_backend_sycl_buffer_set_tensor" (ggml-org#12812) gguf-py : support lazy tensor splitting (ggml-org#12809) llama : Support llama 4 text-only (ggml-org#12791) opencl: better identify Adreno GPU (ggml-org#12760) hellaswag: display estimated score confidence interval (ggml-org#12797) cuda : fix HIP and MUSA BF16 (#0) sync : ggml ggml : simplify Arm fp16 CPU logic (ggml/1177) CUDA: don't convert BF16 weights to FP32 (ggml/1174) cpu: move all the operators into a separate c++ file (except mul_mat) (ggml/1167) sycl: remove redundant memcopy in function ggml_backend_sycl_buffer_set_tensor (ggml-org#12734) ci : no curl on ggml-ci (ggml-org#12796) cmake : enable curl by default (ggml-org#12761) ... # Conflicts: # common/arg.cpp # common/common.cpp # common/common.h
Supersede #12813
Resolve #12180
@qnixsynapse can you try if this fixes the issue?