@@ -262,7 +262,6 @@ static void print_usage(int /* argc */, char ** argv) {
262
262
printf (" -fa, --flash-attn <0|1> (default: %s)\n " , join (cmd_params_defaults.flash_attn , " ," ).c_str ());
263
263
printf (" -mmp, --mmap <0|1> (default: %s)\n " , join (cmd_params_defaults.use_mmap , " ," ).c_str ());
264
264
printf (" --numa <distribute|isolate|numactl> (default: disabled)\n " );
265
- printf (" -mt, --max-threads <n> (default: %d)\n " , cmd_params_defaults.cpuparams .n_threads );
266
265
printf (" -C, --cpu-mask <hex> (default: 0x0)\n " );
267
266
printf (" --cpu-strict <0|1> (default: %d)\n " , cmd_params_defaults.cpuparams .strict_cpu );
268
267
printf (" --priority <0|1|2|3> (default: %d)\n " , cmd_params_defaults.cpuparams .priority );
@@ -470,12 +469,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
470
469
else if (value == " numactl" ) { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
471
470
else { invalid_param = true ; break ; }
472
471
}
473
- } else if (arg == " -mt" || arg == " --max-threads" ) {
474
- if (++i >= argc) {
475
- invalid_param = true ;
476
- break ;
477
- }
478
- params.cpuparams .n_threads = std::stoi (argv[i]);
479
472
} else if (arg == " -C" || arg == " --cpu-mask" ) {
480
473
if (++i >= argc) {
481
474
invalid_param = true ;
@@ -1406,21 +1399,6 @@ int main(int argc, char ** argv) {
1406
1399
1407
1400
postprocess_cpu_params (params.cpuparams );
1408
1401
1409
- struct ggml_threadpool_params tpp;
1410
- tpp.n_threads = params.cpuparams .n_threads ;
1411
- tpp.mask_specified = params.cpuparams .mask_valid ;
1412
- tpp.strict_cpu = params.cpuparams .strict_cpu ;
1413
- tpp.prio = params.cpuparams .priority ;
1414
- tpp.poll = params.cpuparams .poll ;
1415
-
1416
- std::memcpy (&tpp.cpumask [0 ], ¶ms.cpuparams .cpumask [0 ], GGML_MAX_N_THREADS);
1417
-
1418
- struct ggml_compute_threadpool * threadpool = ggml_create_threadpool (&tpp);
1419
- if (!threadpool) {
1420
- LOG_TEE (" %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
1421
- exit (1 );
1422
- }
1423
-
1424
1402
for (const auto & inst : params_instances) {
1425
1403
// keep the same model between tests when possible
1426
1404
if (!lmodel || !prev_inst || !inst.equal_mparams (*prev_inst)) {
@@ -1446,6 +1424,22 @@ int main(int argc, char ** argv) {
1446
1424
test t (inst, lmodel, ctx);
1447
1425
1448
1426
llama_kv_cache_clear (ctx);
1427
+
1428
+ struct ggml_threadpool_params tpp;
1429
+ tpp.n_threads = t.n_threads ;
1430
+ tpp.mask_specified = params.cpuparams .mask_valid ;
1431
+ tpp.strict_cpu = params.cpuparams .strict_cpu ;
1432
+ tpp.prio = params.cpuparams .priority ;
1433
+ tpp.poll = params.cpuparams .poll ;
1434
+
1435
+ std::memcpy (&tpp.cpumask [0 ], ¶ms.cpuparams .cpumask [0 ], GGML_MAX_N_THREADS);
1436
+
1437
+ struct ggml_compute_threadpool * threadpool = ggml_create_threadpool (&tpp);
1438
+ if (!threadpool) {
1439
+ LOG_TEE (" %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
1440
+ exit (1 );
1441
+ }
1442
+
1449
1443
llama_attach_threadpool (ctx, threadpool);
1450
1444
1451
1445
// warmup run
@@ -1486,9 +1480,9 @@ int main(int argc, char ** argv) {
1486
1480
llama_print_timings (ctx);
1487
1481
1488
1482
llama_free (ctx);
1489
- }
1490
1483
1491
- ggml_release_threadpool (threadpool);
1484
+ ggml_release_threadpool (threadpool);
1485
+ }
1492
1486
1493
1487
llama_free_model (lmodel);
1494
1488
0 commit comments