Skip to content

Commit 5d4c0a1

Browse files
threadpool: move process priority setting into the apps (bench and cli)
This avoids changing the overall process priority on Windows for the apps that use ggml/llama.cpp directy.
1 parent 3bcc4de commit 5d4c0a1

File tree

6 files changed

+108
-64
lines changed

6 files changed

+108
-64
lines changed

common/common.cpp

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,57 @@ int32_t cpu_get_num_math() {
251251
return cpu_get_num_physical_cores();
252252
}
253253

254+
// Helper for setting process priority
255+
256+
#if defined(_WIN32)
257+
258+
bool set_process_priority(enum ggml_sched_priority prio) {
259+
if (prio == GGML_SCHED_PRIO_NORMAL) {
260+
return true;
261+
}
262+
263+
DWORD p = NORMAL_PRIORITY_CLASS;
264+
switch (prio) {
265+
case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
266+
case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
267+
case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
268+
case GGML_SCHED_PRIO_REALTIME: p = REALTIME_PRIORITY_CLASS; break;
269+
}
270+
271+
if (!SetPriorityClass(GetCurrentProcess(), p)) {
272+
fprintf(stderr, "warn: failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
273+
return false;
274+
}
275+
276+
return true;
277+
}
278+
279+
#else // MacOS and POSIX
280+
#include <sys/types.h>
281+
#include <sys/resource.h>
282+
283+
bool set_process_priority(enum ggml_sched_priority prio) {
284+
if (prio == GGML_SCHED_PRIO_NORMAL) {
285+
return true;
286+
}
287+
288+
int32_t p = 0;
289+
switch (prio) {
290+
case GGML_SCHED_PRIO_NORMAL: p = 0; break;
291+
case GGML_SCHED_PRIO_MEDIUM: p = -5; break;
292+
case GGML_SCHED_PRIO_HIGH: p = -10; break;
293+
case GGML_SCHED_PRIO_REALTIME: p = -20; break;
294+
}
295+
296+
if (!setpriority(PRIO_PROCESS, 0, p)) {
297+
fprintf(stderr, "warn: failed to set process priority class %d : %s (%d)\n", prio, strerror(errno), errno);
298+
return false;
299+
}
300+
return true;
301+
}
302+
303+
#endif
304+
254305
//
255306
// CLI argument parsing
256307
//
@@ -508,7 +559,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
508559
}
509560
if (arg == "--prio") {
510561
CHECK_ARG
511-
params.cpuparams.priority = std::stoul(argv[i]);
562+
params.cpuparams.priority = (enum ggml_sched_priority) std::stoul(argv[i]);
512563
return true;
513564
}
514565
if (arg == "--cpu-strict") {
@@ -545,7 +596,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
545596
}
546597
if (arg == "--prio-batch") {
547598
CHECK_ARG
548-
params.cpuparams_batch.priority = std::stoul(argv[i]);
599+
params.cpuparams_batch.priority = (enum ggml_sched_priority) std::stoul(argv[i]);
549600
return true;
550601
}
551602
if (arg == "--cpu-strict-batch") {
@@ -581,7 +632,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
581632
}
582633
if (arg == "--prio-draft") {
583634
CHECK_ARG
584-
params.draft_cpuparams.priority = std::stoul(argv[i]);
635+
params.draft_cpuparams.priority = (enum ggml_sched_priority) std::stoul(argv[i]);
585636
return true;
586637
}
587638
if (arg == "--cpu-strict-draft") {
@@ -610,7 +661,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
610661
}
611662
if (arg == "--prio-batch-draft") {
612663
CHECK_ARG
613-
params.draft_cpuparams_batch.priority = std::stoul(argv[i]);
664+
params.draft_cpuparams_batch.priority = (enum ggml_sched_priority) std::stoul(argv[i]);
614665
return true;
615666
}
616667
if (arg == "--cpu-strict-batch-draft") {

common/common.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ struct cpu_params {
7171
int n_threads = -1;
7272
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
7373
bool mask_valid = false; // Default: any CPU
74-
int32_t priority = 0; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
74+
enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
7575
bool strict_cpu = false; // Use strict CPU placement
7676
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
7777
};
@@ -290,6 +290,7 @@ std::string gpt_params_get_system_info(const gpt_params & params);
290290
bool parse_cpu_range(const std::string& range, bool(&boolmask)[GGML_MAX_N_THREADS]);
291291
bool parse_cpu_mask(const std::string& mask, bool(&boolmask)[GGML_MAX_N_THREADS]);
292292
void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model = nullptr);
293+
bool set_process_priority(enum ggml_sched_priority prio);
293294

294295
//
295296
// String utils

examples/llama-bench/llama-bench.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ struct cmd_params {
240240
std::vector<bool> embeddings;
241241
ggml_numa_strategy numa;
242242
int reps;
243-
int prio;
243+
ggml_sched_priority prio;
244244
int delay;
245245
bool verbose;
246246
output_formats output_format;
@@ -271,7 +271,7 @@ static const cmd_params cmd_params_defaults = {
271271
/* embeddings */ {false},
272272
/* numa */ GGML_NUMA_STRATEGY_DISABLED,
273273
/* reps */ 5,
274-
/* prio */ 0,
274+
/* prio */ GGML_SCHED_PRIO_NORMAL,
275275
/* delay */ 0,
276276
/* verbose */ false,
277277
/* output_format */ MARKDOWN,
@@ -585,7 +585,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
585585
invalid_param = true;
586586
break;
587587
}
588-
params.prio = std::stoi(argv[i]);
588+
params.prio = (enum ggml_sched_priority) std::stoi(argv[i]);
589589
} else if (arg == "--delay") {
590590
if (++i >= argc) {
591591
invalid_param = true;
@@ -1470,6 +1470,8 @@ int main(int argc, char ** argv) {
14701470
llama_backend_init();
14711471
llama_numa_init(params.numa);
14721472

1473+
set_process_priority(params.prio);
1474+
14731475
// initialize printer
14741476
std::unique_ptr<printer> p = create_printer(params.output_format);
14751477
std::unique_ptr<printer> p_err = create_printer(params.output_format_stderr);
@@ -1525,9 +1527,9 @@ int main(int argc, char ** argv) {
15251527
LOG_TEE("%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str());
15261528
exit(1);
15271529
}
1528-
tpp.strict_cpu = t.cpu_strict;
1529-
tpp.poll = t.poll;
1530-
tpp.prio = params.prio;
1530+
tpp.strict_cpu = t.cpu_strict;
1531+
tpp.poll = t.poll;
1532+
tpp.prio = params.prio;
15311533

15321534
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
15331535
if (!threadpool) {

examples/main/main.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,8 @@ int main(int argc, char ** argv) {
230230
struct ggml_threadpool_params tpp =
231231
ggml_threadpool_params_from_cpu_params(params.cpuparams);
232232

233+
set_process_priority(params.cpuparams.priority);
234+
233235
struct ggml_compute_threadpool * threadpool_batch = NULL;
234236
if (!ggml_threadpool_params_match(&tpp, &tpp_batch)) {
235237
threadpool_batch = ggml_create_threadpool(&tpp_batch);

ggml/include/ggml.h

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -626,15 +626,23 @@ extern "C" {
626626
// If it returns true, the computation is aborted
627627
typedef bool (*ggml_abort_callback)(void * data);
628628

629+
// Scheduling priorities
630+
enum ggml_sched_priority {
631+
GGML_SCHED_PRIO_NORMAL,
632+
GGML_SCHED_PRIO_MEDIUM,
633+
GGML_SCHED_PRIO_HIGH,
634+
GGML_SCHED_PRIO_REALTIME
635+
};
636+
629637
// Threadpool params
630638
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
631639
struct ggml_threadpool_params {
632-
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
633-
int n_threads; // number of threads
634-
int32_t prio; // thread priority
635-
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
636-
bool strict_cpu; // strict cpu placement
637-
bool paused; // start in paused state
640+
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
641+
int n_threads; // number of threads
642+
enum ggml_sched_priority prio; // thread priority
643+
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
644+
bool strict_cpu; // strict cpu placement
645+
bool paused; // start in paused state
638646
};
639647

640648
struct ggml_compute_threadpool; // forward declaration, see ggml.c

ggml/src/ggml.c

Lines changed: 27 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -18655,18 +18655,11 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
1865518655

1865618656
static thread_ret_t ggml_graph_compute_secondary_thread(void* data);
1865718657

18658-
enum {
18659-
SCHED_PRIO_NORMAL,
18660-
SCHED_PRIO_MEDIUM,
18661-
SCHED_PRIO_HIGH,
18662-
SCHED_PRIO_REALTIME
18663-
};
18664-
1866518658
#if defined(_WIN32)
1866618659
#include "windows.h"
1866718660

1866818661
// TODO: support > 64 CPUs
18669-
static bool ggml_thread_apply_affinity(bool * mask) {
18662+
bool ggml_thread_apply_affinity(bool * mask) {
1867018663
HANDLE h = GetCurrentThread();
1867118664
uint64_t bitmask = 0ULL;
1867218665

@@ -18700,33 +18693,20 @@ static bool ggml_thread_apply_affinity(bool * mask) {
1870018693
return m != 0;
1870118694
}
1870218695

18703-
static bool ggml_thread_apply_thread_priority(int32_t prio) {
18704-
DWORD p = NORMAL_PRIORITY_CLASS;
18705-
18706-
if (prio == SCHED_PRIO_NORMAL) {
18707-
// Keep inherited policy/priority
18708-
return true;
18709-
}
18710-
18711-
// On Windows we have to update Process Priority Class in order to set Thread priority.
18712-
18696+
static bool ggml_thread_apply_priority(int32_t prio) {
18697+
// Note that on Windows the Process Priority Class must be updated in order to set Thread priority.
18698+
// This is up to the applications.
18699+
DWORD p = THREAD_PRIORITY_NORMAL;
1871318700
switch (prio) {
18714-
case SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
18715-
case SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
18716-
case SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
18717-
case SCHED_PRIO_REALTIME: p = REALTIME_PRIORITY_CLASS; break;
18718-
}
18719-
18720-
if (!SetPriorityClass(GetCurrentProcess(), p)) {
18721-
fprintf(stderr, "warn: failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
18722-
return false;
18701+
case GGML_SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
18702+
case GGML_SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
18703+
case GGML_SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
18704+
case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
1872318705
}
1872418706

18725-
switch (prio) {
18726-
case SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
18727-
case SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
18728-
case SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
18729-
case SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
18707+
if (prio == GGML_SCHED_PRIO_NORMAL) {
18708+
// Keep inherited policy/priority
18709+
return true;
1873018710
}
1873118711

1873218712
if (!SetThreadPriority(GetCurrentThread(), p)) {
@@ -18747,17 +18727,17 @@ static bool ggml_thread_apply_affinity(const bool * mask) {
1874718727
return true;
1874818728
}
1874918729

18750-
static bool ggml_thread_apply_thread_priority(int32_t prio) {
18730+
static bool ggml_thread_apply_priority(int32_t prio) {
1875118731
struct sched_param p;
1875218732
int32_t policy = SCHED_OTHER;
1875318733
switch (prio) {
18754-
case SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
18755-
case SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
18756-
case SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
18757-
case SCHED_PRIO_REALTIME: policy = SCHED_FIFO; p.sched_priority = 90; break;
18734+
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
18735+
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
18736+
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
18737+
case GGML_SCHED_PRIO_REALTIME: policy = SCHED_FIFO; p.sched_priority = 90; break;
1875818738
}
1875918739

18760-
if (prio == SCHED_PRIO_NORMAL) {
18740+
if (prio == GGML_SCHED_PRIO_NORMAL) {
1876118741
// Keep inherited policy/priority
1876218742
return true;
1876318743
}
@@ -18802,17 +18782,17 @@ static bool ggml_thread_apply_affinity(const bool * mask) {
1880218782
return true;
1880318783
}
1880418784

18805-
static bool ggml_thread_apply_thread_priority(int32_t prio) {
18785+
static bool ggml_thread_apply_priority(int32_t prio) {
1880618786
struct sched_param p;
1880718787
int32_t policy = SCHED_OTHER;
1880818788
switch (prio) {
18809-
case SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
18810-
case SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
18811-
case SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
18812-
case SCHED_PRIO_REALTIME: policy = SCHED_FIFO; p.sched_priority = 90; break;
18789+
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
18790+
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
18791+
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
18792+
case GGML_SCHED_PRIO_REALTIME: policy = SCHED_FIFO; p.sched_priority = 90; break;
1881318793
}
1881418794

18815-
if (prio == SCHED_PRIO_NORMAL) {
18795+
if (prio == GGML_SCHED_PRIO_NORMAL) {
1881618796
// Keep inherited policy/priority
1881718797
return true;
1881818798
}
@@ -19190,7 +19170,7 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
1919019170
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
1919119171
struct ggml_compute_threadpool * threadpool = state->threadpool;
1919219172

19193-
ggml_thread_apply_thread_priority(threadpool->prio);
19173+
ggml_thread_apply_priority(threadpool->prio);
1919419174
if (ggml_thread_cpumask_is_valid(state->cpumask)) {
1919519175
ggml_thread_apply_affinity(state->cpumask);
1919619176
}
@@ -19238,7 +19218,7 @@ static void ggml_graph_compute_kickoff(struct ggml_compute_threadpool * threadpo
1923819218

1923919219
if (threadpool->pause) {
1924019220
// Update main thread prio and affinity to match the threadpool settings
19241-
ggml_thread_apply_thread_priority(threadpool->prio);
19221+
ggml_thread_apply_priority(threadpool->prio);
1924219222
if (ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
1924319223
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
1924419224
}
@@ -19333,7 +19313,7 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
1933319313

1933419314
if (!threadpool->pause) {
1933519315
// Update main thread prio and affinity at the start, otherwise we'll do it in resume
19336-
ggml_thread_apply_thread_priority(threadpool->prio);
19316+
ggml_thread_apply_priority(threadpool->prio);
1933719317
if (ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
1933819318
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
1933919319
}

0 commit comments

Comments
 (0)