Skip to content

Commit 4aa7a72

Browse files
max-krasnyanskyfmz
authored and
fmz
committed
bench: create fresh threadpool for each test
For benchmarking it's better to start a fresh pool for each test with the exact number of threads needed for that test. Having larger pools is suboptimal (causes more load, etc).
1 parent a95e1a2 commit 4aa7a72

File tree

1 file changed

+18
-24
lines changed

1 file changed

+18
-24
lines changed

examples/llama-bench/llama-bench.cpp

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,6 @@ static void print_usage(int /* argc */, char ** argv) {
262262
printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str());
263263
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
264264
printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
265-
printf(" -mt, --max-threads <n> (default: %d)\n", cmd_params_defaults.cpuparams.n_threads);
266265
printf(" -C, --cpu-mask <hex> (default: 0x0)\n");
267266
printf(" --cpu-strict <0|1> (default: %d)\n", cmd_params_defaults.cpuparams.strict_cpu);
268267
printf(" --priority <0|1|2|3> (default: %d)\n", cmd_params_defaults.cpuparams.priority);
@@ -470,12 +469,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
470469
else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
471470
else { invalid_param = true; break; }
472471
}
473-
} else if (arg == "-mt" || arg == "--max-threads") {
474-
if (++i >= argc) {
475-
invalid_param = true;
476-
break;
477-
}
478-
params.cpuparams.n_threads = std::stoi(argv[i]);
479472
} else if (arg == "-C" || arg == "--cpu-mask") {
480473
if (++i >= argc) {
481474
invalid_param = true;
@@ -1406,21 +1399,6 @@ int main(int argc, char ** argv) {
14061399

14071400
postprocess_cpu_params(params.cpuparams);
14081401

1409-
struct ggml_threadpool_params tpp;
1410-
tpp.n_threads = params.cpuparams.n_threads;
1411-
tpp.mask_specified = params.cpuparams.mask_valid;
1412-
tpp.strict_cpu = params.cpuparams.strict_cpu;
1413-
tpp.prio = params.cpuparams.priority;
1414-
tpp.poll = params.cpuparams.poll;
1415-
1416-
std::memcpy(&tpp.cpumask[0], &params.cpuparams.cpumask[0], GGML_MAX_N_THREADS);
1417-
1418-
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
1419-
if (!threadpool) {
1420-
LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
1421-
exit(1);
1422-
}
1423-
14241402
for (const auto & inst : params_instances) {
14251403
// keep the same model between tests when possible
14261404
if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
@@ -1446,6 +1424,22 @@ int main(int argc, char ** argv) {
14461424
test t(inst, lmodel, ctx);
14471425

14481426
llama_kv_cache_clear(ctx);
1427+
1428+
struct ggml_threadpool_params tpp;
1429+
tpp.n_threads = t.n_threads;
1430+
tpp.mask_specified = params.cpuparams.mask_valid;
1431+
tpp.strict_cpu = params.cpuparams.strict_cpu;
1432+
tpp.prio = params.cpuparams.priority;
1433+
tpp.poll = params.cpuparams.poll;
1434+
1435+
std::memcpy(&tpp.cpumask[0], &params.cpuparams.cpumask[0], GGML_MAX_N_THREADS);
1436+
1437+
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
1438+
if (!threadpool) {
1439+
LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
1440+
exit(1);
1441+
}
1442+
14491443
llama_attach_threadpool(ctx, threadpool);
14501444

14511445
// warmup run
@@ -1486,9 +1480,9 @@ int main(int argc, char ** argv) {
14861480
llama_print_timings(ctx);
14871481

14881482
llama_free(ctx);
1489-
}
14901483

1491-
ggml_release_threadpool(threadpool);
1484+
ggml_release_threadpool(threadpool);
1485+
}
14921486

14931487
llama_free_model(lmodel);
14941488

0 commit comments

Comments
 (0)