Skip to content

Commit a99c104

Browse files
threadpool: make polling the default to match openmp behavior
All command line args now allow for setting poll to 0 (false).
1 parent 56fe3ee commit a99c104

File tree

3 files changed

+32
-16
lines changed

3 files changed

+32
-16
lines changed

common/common.cpp

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -435,11 +435,13 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
435435
return true;
436436
}
437437
if (arg == "--cpu-strict") {
438-
params.cpuparams.strict_cpu = true;
438+
CHECK_ARG
439+
params.cpuparams.strict_cpu = std::stoul(argv[i]);
439440
return true;
440441
}
441442
if (arg == "--poll") {
442-
params.cpuparams.poll = true;
443+
CHECK_ARG
444+
params.cpuparams.poll = std::stoul(argv[i]);
443445
return true;
444446
}
445447
if (arg == "-tb" || arg == "--threads-batch") {
@@ -474,7 +476,8 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
474476
return true;
475477
}
476478
if (arg == "--poll-batch") {
477-
params.cpuparams_batch.poll = true;
479+
CHECK_ARG
480+
params.cpuparams_batch.poll = std::stoul(argv[i]);
478481
return true;
479482
}
480483
if (arg == "-td" || arg == "--threads-draft") {
@@ -509,7 +512,8 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
509512
return true;
510513
}
511514
if (arg == "--poll-draft") {
512-
params.draft_cpuparams.poll = true;
515+
CHECK_ARG
516+
params.draft_cpuparams.poll = std::stoul(argv[i]);
513517
return true;
514518
}
515519
if (arg == "-tbd" || arg == "--threads-batch-draft") {
@@ -537,7 +541,8 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
537541
return true;
538542
}
539543
if (arg == "--poll-batch-draft") {
540-
params.draft_cpuparams_batch.poll = true;
544+
CHECK_ARG
545+
params.draft_cpuparams_batch.poll = std::stoul(argv[i]);
541546
return true;
542547
}
543548
if (arg == "-p" || arg == "--prompt") {
@@ -1627,34 +1632,37 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
16271632
options.push_back({ "*", "-t, --threads N", "number of threads to use during generation (default: %d)", params.cpuparams.n_threads });
16281633
options.push_back({ "*", "-C, --cpu-mask M", "CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")"});
16291634
options.push_back({ "*", "-Cr, --cpu-range lo-hi", "range of CPUs for affinity. Complements --cpu-mask"});
1630-
options.push_back({ "*", " --cpu-strict", "use strict CPU placement (default: %u)\n", (unsigned) params.cpuparams.strict_cpu});
1635+
options.push_back({ "*", " --cpu-strict <0|1>", "use strict CPU placement (default: %u)\n", (unsigned) params.cpuparams.strict_cpu});
16311636
options.push_back({ "*", " --priority N", "set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority});
1632-
options.push_back({ "*", " --poll", "use polling to wait for work (default: %u)\n", (unsigned) params.cpuparams.poll});
1637+
options.push_back({ "*", " --poll <0|1>", "use polling to wait for work (default: %u)\n", (unsigned) params.cpuparams.poll});
16331638
options.push_back({ "*", "-tb, --threads-batch N", "number of threads to use during batch and prompt processing (default: same as --threads)" });
16341639
options.push_back({ "*", "-Cb, --cpu-mask-batch M", "CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)"});
16351640
options.push_back({ "*", "-Crb, --cpu-range-batch lo-hi",
16361641
"ranges of CPUs for affinity. Complements --cpu-mask-batch"});
1637-
options.push_back({ "*", " --cpu-strict-batch", "use strict CPU placement (default: same as --cpu-strict)"});
1642+
options.push_back({ "*", " --cpu-strict-batch <0|1>",
1643+
"use strict CPU placement (default: same as --cpu-strict)"});
16381644
options.push_back({ "*", " --priority-batch N", "set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: --priority)"});
1639-
options.push_back({ "*", " --poll-batch", "use polling to wait for work (default: --poll)"});
1645+
options.push_back({ "*", " --poll-batch <0|1>", "use polling to wait for work (default: same as --poll"});
16401646
options.push_back({ "speculative", "-td, --threads-draft N", "number of threads to use during generation (default: same as --threads)" });
16411647
options.push_back({ "speculative", "-Cd, --cpu-mask-draft M", "Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)"});
16421648
options.push_back({ "speculative", "-Crd, --cpu-range-draft lo-hi",
16431649
"Ranges of CPUs for affinity. Complements --cpu-mask-draft"});
1644-
options.push_back({ "speculative", " --cpu-strict-draft", "Use strict CPU placement for draft model (default: same as --cpu-strict)"});
1650+
options.push_back({ "speculative", " --cpu-strict-draft <0|1>",
1651+
"Use strict CPU placement for draft model (default: same as --cpu-strict)"});
16451652
options.push_back({ "speculative", " --priority-draft N", "Set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: same as --priority)"});
1646-
options.push_back({ "speculative", " --poll-draft", "Use polling to wait for draft model work (default: same as --poll])"});
1653+
options.push_back({ "speculative", " --poll-draft <0|1>", "Use polling to wait for draft model work (default: same as --poll])"});
16471654
options.push_back({ "speculative", "-tbd, --threads-batch-draft N",
16481655
"number of threads to use during batch and prompt processing (default: same as --threads-draft)" });
16491656
options.push_back({ "speculative", "-Cbd, --cpu-mask-batch-draft M",
16501657
"Draft model CPU affinity mask. Complements cpu-range-draft-batch (default: same as --cpu-mask-draft)"});
16511658
options.push_back({ "speculative", "-Crbd, --cpu-range-batch-draft lo-hi",
16521659
"Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)"});
1653-
options.push_back({ "speculative", " --cpu-strict-batch-draft",
1660+
options.push_back({ "speculative", " --cpu-strict-batch-draft <0|1>",
16541661
"Use strict CPU placement for draft model (default: --cpu-strict-draft)"});
16551662
options.push_back({ "speculative", " --priority-batch-draft N",
16561663
"Set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: --priority-draft)"});
1657-
options.push_back({ "speculative", " --poll-batch-draft", "Use polling to wait for draft model work (default: --poll-draft)"});
1664+
options.push_back({ "speculative", " --poll-batch-draft <0|1>",
1665+
"Use polling to wait for draft model work (default: --poll-draft)"});
16581666

16591667
options.push_back({ "speculative", " --draft N", "number of tokens to draft for speculative decoding (default: %d)", params.n_draft });
16601668
options.push_back({ "speculative", "-ps, --p-split N", "speculative decoding split probability (default: %.1f)", (double)params.p_split });

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ struct cpu_params {
7373
bool mask_valid = false; // Default: any CPU
7474
int32_t priority = 0; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
7575
bool strict_cpu = false; // Use strict CPU placement
76-
bool poll = false; // Use polling (busywait) to wait for work
76+
bool poll = true; // Use polling (busywait) to wait for work (default matches OpenMP)
7777
};
7878

7979
struct gpt_params {

examples/llama-bench/llama-bench.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -513,9 +513,17 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
513513
}
514514
params.cpuparams.priority = std::stoul(argv[i]);
515515
} else if (arg == "--cpu-strict") {
516-
params.cpuparams.strict_cpu = true;
516+
if (++i >= argc) {
517+
invalid_param = true;
518+
break;
519+
}
520+
params.cpuparams.strict_cpu = std::stoul(argv[i]);
517521
} else if (arg == "--poll") {
518-
params.cpuparams.poll = true;
522+
if (++i >= argc) {
523+
invalid_param = true;
524+
break;
525+
}
526+
params.cpuparams.poll = std::stoul(argv[i]);
519527
} else if (arg == "-fa" || arg == "--flash-attn") {
520528
if (++i >= argc) {
521529
invalid_param = true;

0 commit comments

Comments
 (0)