Skip to content

Commit 3bead57

Browse files
committed
refactoring the process_token function
1 parent 0c65d40 commit 3bead57

File tree

1 file changed

+28
-23
lines changed

1 file changed

+28
-23
lines changed

tools/server/server.cpp

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2208,7 +2208,7 @@ struct server_context {
22082208
bool full_stop_reached = false;
22092209
bool partial_stop_reached = false;
22102210

2211-
// search start strings
2211+
// search the start strings
22122212
if (start_string_missing && !incomplete && slot.has_next_token) {
22132213
size_t max_start_string_size = slot.params.start_string_max_len;
22142214
size_t search_len = max_start_string_size + token_str.size();
@@ -2230,17 +2230,11 @@ struct server_context {
22302230
}
22312231
}
22322232

2233+
// search the stop strings
22332234
if (!incomplete) {
22342235
size_t pos = std::min(slot.n_sent_text, slot.generated_text.size());
22352236

22362237
const std::string str_test = slot.generated_text.substr(pos);
2237-
bool send_text = true;
2238-
2239-
// Handle the start strings
2240-
if (start_string_missing)
2241-
{
2242-
send_text = false;
2243-
}
22442238

22452239
// search stop word and delete it
22462240
size_t stop_pos = slot.find_stopping_strings(str_test, token_str.size(), true);
@@ -2249,33 +2243,44 @@ struct server_context {
22492243
slot.generated_text.begin() + pos + stop_pos,
22502244
slot.generated_text.end());
22512245
pos = std::min(slot.n_sent_text, slot.generated_text.size());
2246+
full_stop_reached = true;
22522247
} else if (slot.has_next_token) {
22532248
stop_pos = slot.find_stopping_strings(str_test, token_str.size(), false);
2254-
send_text = send_text && stop_pos == std::string::npos;
2249+
partial_stop_reached = (stop_pos != std::string::npos);
22552250
}
2251+
}
22562252

2257-
// check if there is any token to predict
2258-
if (send_text) {
2259-
// no send the stop word in the response
2260-
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
2261-
slot.n_sent_text += result.text_to_send.size();
2262-
// add the token to slot queue and cache
2263-
} else {
2264-
result.text_to_send = "";
2265-
}
2253+
if(full_stop_reached)
2254+
{
2255+
slot.stop = STOP_TYPE_WORD;
2256+
slot.has_next_token = false;
2257+
SLT_DBG(slot, "stopped by word, n_decoded = %d, n_predict = %d\n", slot.n_decoded, slot.params.n_predict);
2258+
}
22662259

2267-
slot.add_token(result);
2268-
if (slot.params.stream) {
2269-
send_partial_response(slot, result);
2270-
}
2260+
if(partial_stop_reached || start_string_missing)
2261+
{
2262+
result.text_to_send = "";
2263+
}
2264+
else
2265+
{
2266+
size_t valid_generated_len = validate_utf8(slot.generated_text);
2267+
size_t available_data = valid_generated_len - slot.n_sent_text;
2268+
result.text_to_send = slot.generated_text.substr(slot.n_sent_text, available_data);
2269+
slot.n_sent_text += result.text_to_send.size();
2270+
}
2271+
2272+
slot.add_token(result);
2273+
2274+
if (slot.params.stream && !result.text_to_send.empty()) {
2275+
send_partial_response(slot, result);
22712276
}
22722277

22732278
if (incomplete) {
22742279
slot.has_next_token = true;
22752280
}
22762281

22772282
// check the limits
2278-
if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params_base)) {
2283+
if (slot.has_next_token && token_budget_exhausted) {
22792284
slot.stop = STOP_TYPE_LIMIT;
22802285
slot.has_next_token = false;
22812286

0 commit comments

Comments
 (0)