@@ -1552,11 +1552,11 @@ struct server_queue {
1552
1552
std::condition_variable condition_tasks;
1553
1553
1554
1554
// callback functions
1555
- std::function<void (server_task)> callback_new_task;
1556
- std::function<void (void )> callback_update_slots;
1555
+ std::function<void (server_task& )> callback_new_task;
1556
+ std::function<void (void )> callback_update_slots;
1557
1557
1558
1558
// Add a new task to the end of the queue
1559
- int post (server_task task, bool front = false ) {
1559
+ int post (server_task & task, bool front = false ) {
1560
1560
std::unique_lock<std::mutex> lock (mutex_tasks);
1561
1561
GGML_ASSERT (task.id != -1 );
1562
1562
// if this is cancel task make sure to clean up pending tasks
@@ -1596,7 +1596,7 @@ struct server_queue {
1596
1596
}
1597
1597
1598
1598
// Add a new task, but defer until one slot is available
1599
- void defer (server_task task) {
1599
+ void defer (server_task & task) {
1600
1600
std::unique_lock<std::mutex> lock (mutex_tasks);
1601
1601
QUE_DBG (" defer task, id = %d\n " , task.id );
1602
1602
queue_tasks_deferred.push_back (std::move (task));
@@ -1611,7 +1611,7 @@ struct server_queue {
1611
1611
}
1612
1612
1613
1613
// Register function to process a new task
1614
- void on_new_task (std::function<void (server_task)> callback) {
1614
+ void on_new_task (std::function<void (server_task& )> callback) {
1615
1615
callback_new_task = std::move (callback);
1616
1616
}
1617
1617
@@ -1660,12 +1660,12 @@ struct server_queue {
1660
1660
lock.unlock ();
1661
1661
break ;
1662
1662
}
1663
- server_task task = queue_tasks.front ();
1663
+ server_task task = std::move ( queue_tasks.front () );
1664
1664
queue_tasks.pop_front ();
1665
1665
lock.unlock ();
1666
1666
1667
1667
QUE_DBG (" processing task, id = %d\n " , task.id );
1668
- callback_new_task (std::move ( task) );
1668
+ callback_new_task (task);
1669
1669
}
1670
1670
1671
1671
// all tasks in the current loop is processed, slots data is now ready
@@ -2004,7 +2004,7 @@ struct server_context {
2004
2004
2005
2005
slot.reset ();
2006
2006
2007
- slots.push_back (slot);
2007
+ slots.push_back (std::move ( slot) );
2008
2008
}
2009
2009
2010
2010
default_generation_settings_for_props = slots[0 ].to_json ();
@@ -2547,7 +2547,7 @@ struct server_context {
2547
2547
server_task task (SERVER_TASK_TYPE_CANCEL);
2548
2548
task.id_target = id_task;
2549
2549
queue_results.remove_waiting_task_id (id_task);
2550
- cancel_tasks.push_back (task);
2550
+ cancel_tasks.push_back (std::move ( task) );
2551
2551
}
2552
2552
// push to beginning of the queue, so it has highest priority
2553
2553
queue_tasks.post (cancel_tasks, true );
@@ -2637,7 +2637,7 @@ struct server_context {
2637
2637
// Functions to process the task
2638
2638
//
2639
2639
2640
- void process_single_task (server_task task) {
2640
+ void process_single_task (server_task & task) {
2641
2641
switch (task.type ) {
2642
2642
case SERVER_TASK_TYPE_COMPLETION:
2643
2643
case SERVER_TASK_TYPE_INFILL:
@@ -3965,7 +3965,7 @@ int main(int argc, char ** argv) {
3965
3965
task.params .oaicompat_cmpl_id = completion_id;
3966
3966
// oaicompat_model is already populated by params_from_json_cmpl
3967
3967
3968
- tasks.push_back (task);
3968
+ tasks.push_back (std::move ( task) );
3969
3969
}
3970
3970
} catch (const std::exception & e) {
3971
3971
res_error (res, format_error_response (e.what (), ERROR_TYPE_INVALID_REQUEST));
@@ -4280,7 +4280,7 @@ int main(int argc, char ** argv) {
4280
4280
// OAI-compat
4281
4281
task.params .oaicompat = oaicompat;
4282
4282
4283
- tasks.push_back (task);
4283
+ tasks.push_back (std::move ( task) );
4284
4284
}
4285
4285
4286
4286
ctx_server.queue_results .add_waiting_tasks (tasks);
@@ -4376,7 +4376,7 @@ int main(int argc, char ** argv) {
4376
4376
task.id = ctx_server.queue_tasks .get_new_id ();
4377
4377
task.index = i;
4378
4378
task.prompt_tokens = format_rerank (ctx_server.vocab , tokenized_query, tokenized_docs[i]);
4379
- tasks.push_back (task);
4379
+ tasks.push_back (std::move ( task) );
4380
4380
}
4381
4381
4382
4382
ctx_server.queue_results .add_waiting_tasks (tasks);
@@ -4582,7 +4582,7 @@ int main(int argc, char ** argv) {
4582
4582
common_chat_templates_source (ctx_server.chat_templates .get ()),
4583
4583
common_chat_format_example (ctx_server.chat_templates .get (), ctx_server.params_base .use_jinja ).c_str ());
4584
4584
4585
- ctx_server.queue_tasks .on_new_task ([&ctx_server](const server_task & task) {
4585
+ ctx_server.queue_tasks .on_new_task ([&ctx_server](server_task & task) {
4586
4586
ctx_server.process_single_task (task);
4587
4587
});
4588
4588
0 commit comments