|
16 | 16 | #include <sstream>
|
17 | 17 | #include <string>
|
18 | 18 | #include <vector>
|
| 19 | +#include <thread> |
19 | 20 |
|
20 | 21 | #include "ggml.h"
|
21 | 22 | #include "llama.h"
|
@@ -240,6 +241,7 @@ struct cmd_params {
|
240 | 241 | ggml_numa_strategy numa;
|
241 | 242 | int reps;
|
242 | 243 | int prio;
|
| 244 | + int delay; |
243 | 245 | bool verbose;
|
244 | 246 | output_formats output_format;
|
245 | 247 | output_formats output_format_stderr;
|
@@ -270,6 +272,7 @@ static const cmd_params cmd_params_defaults = {
|
270 | 272 | /* numa */ GGML_NUMA_STRATEGY_DISABLED,
|
271 | 273 | /* reps */ 5,
|
272 | 274 | /* prio */ 0,
|
| 275 | + /* delay */ 0, |
273 | 276 | /* verbose */ false,
|
274 | 277 | /* output_format */ MARKDOWN,
|
275 | 278 | /* output_format_stderr */ NONE,
|
@@ -304,6 +307,7 @@ static void print_usage(int /* argc */, char ** argv) {
|
304 | 307 | printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
|
305 | 308 | printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
306 | 309 | printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
|
| 310 | + printf(" --delay <0...N> (default: %d)\n", cmd_params_defaults.delay); |
307 | 311 | printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
|
308 | 312 | printf(" -oe, --output-err <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
|
309 | 313 | printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
|
@@ -351,6 +355,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
351 | 355 | params.reps = cmd_params_defaults.reps;
|
352 | 356 | params.numa = cmd_params_defaults.numa;
|
353 | 357 | params.prio = cmd_params_defaults.prio;
|
| 358 | + params.delay = cmd_params_defaults.delay; |
354 | 359 |
|
355 | 360 | for (int i = 1; i < argc; i++) {
|
356 | 361 | arg = argv[i];
|
@@ -467,12 +472,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
467 | 472 | }
|
468 | 473 | auto p = string_split<int>(argv[i], split_delim);
|
469 | 474 | params.poll.insert(params.poll.end(), p.begin(), p.end());
|
470 |
| - } else if (arg == "--prio") { |
471 |
| - if (++i >= argc) { |
472 |
| - invalid_param = true; |
473 |
| - break; |
474 |
| - } |
475 |
| - params.prio = std::stoi(argv[i]); |
476 | 475 | } else if (arg == "-ngl" || arg == "--n-gpu-layers") {
|
477 | 476 | if (++i >= argc) {
|
478 | 477 | invalid_param = true;
|
@@ -581,6 +580,18 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
581 | 580 | break;
|
582 | 581 | }
|
583 | 582 | params.reps = std::stoi(argv[i]);
|
| 583 | + } else if (arg == "--prio") { |
| 584 | + if (++i >= argc) { |
| 585 | + invalid_param = true; |
| 586 | + break; |
| 587 | + } |
| 588 | + params.prio = std::stoi(argv[i]); |
| 589 | + } else if (arg == "--delay") { |
| 590 | + if (++i >= argc) { |
| 591 | + invalid_param = true; |
| 592 | + break; |
| 593 | + } |
| 594 | + params.delay = std::stoi(argv[i]); |
584 | 595 | } else if (arg == "-o" || arg == "--output") {
|
585 | 596 | if (++i >= argc) {
|
586 | 597 | invalid_param = true;
|
@@ -1504,6 +1515,11 @@ int main(int argc, char ** argv) {
|
1504 | 1515 |
|
1505 | 1516 | llama_kv_cache_clear(ctx);
|
1506 | 1517 |
|
| 1518 | + // cool off before the test |
| 1519 | + if (params.delay) { |
| 1520 | + std::this_thread::sleep_for(std::chrono::seconds(params.delay)); |
| 1521 | + } |
| 1522 | + |
1507 | 1523 | struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads);
|
1508 | 1524 | if (!parse_cpu_mask(t.cpu_mask, tpp.cpumask)) {
|
1509 | 1525 | LOG_TEE("%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str());
|
|
0 commit comments