Skip to content

Commit 3bcc4de

Browse files
llama-bench: add support for cool off between tests --delay
This helps for long running tests on platforms that are thermally limited (phones, laptops, etc). --delay (disabled by default) introduces the sleep for N seconds before starting each test.
1 parent 8d5ab9a commit 3bcc4de

File tree

1 file changed

+22
-6
lines changed

1 file changed

+22
-6
lines changed

examples/llama-bench/llama-bench.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <sstream>
1717
#include <string>
1818
#include <vector>
19+
#include <thread>
1920

2021
#include "ggml.h"
2122
#include "llama.h"
@@ -240,6 +241,7 @@ struct cmd_params {
240241
ggml_numa_strategy numa;
241242
int reps;
242243
int prio;
244+
int delay;
243245
bool verbose;
244246
output_formats output_format;
245247
output_formats output_format_stderr;
@@ -270,6 +272,7 @@ static const cmd_params cmd_params_defaults = {
270272
/* numa */ GGML_NUMA_STRATEGY_DISABLED,
271273
/* reps */ 5,
272274
/* prio */ 0,
275+
/* delay */ 0,
273276
/* verbose */ false,
274277
/* output_format */ MARKDOWN,
275278
/* output_format_stderr */ NONE,
@@ -304,6 +307,7 @@ static void print_usage(int /* argc */, char ** argv) {
304307
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
305308
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
306309
printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
310+
printf(" --delay <0...N> (default: %d)\n", cmd_params_defaults.delay);
307311
printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
308312
printf(" -oe, --output-err <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr));
309313
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
@@ -351,6 +355,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
351355
params.reps = cmd_params_defaults.reps;
352356
params.numa = cmd_params_defaults.numa;
353357
params.prio = cmd_params_defaults.prio;
358+
params.delay = cmd_params_defaults.delay;
354359

355360
for (int i = 1; i < argc; i++) {
356361
arg = argv[i];
@@ -467,12 +472,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
467472
}
468473
auto p = string_split<int>(argv[i], split_delim);
469474
params.poll.insert(params.poll.end(), p.begin(), p.end());
470-
} else if (arg == "--prio") {
471-
if (++i >= argc) {
472-
invalid_param = true;
473-
break;
474-
}
475-
params.prio = std::stoi(argv[i]);
476475
} else if (arg == "-ngl" || arg == "--n-gpu-layers") {
477476
if (++i >= argc) {
478477
invalid_param = true;
@@ -581,6 +580,18 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
581580
break;
582581
}
583582
params.reps = std::stoi(argv[i]);
583+
} else if (arg == "--prio") {
584+
if (++i >= argc) {
585+
invalid_param = true;
586+
break;
587+
}
588+
params.prio = std::stoi(argv[i]);
589+
} else if (arg == "--delay") {
590+
if (++i >= argc) {
591+
invalid_param = true;
592+
break;
593+
}
594+
params.delay = std::stoi(argv[i]);
584595
} else if (arg == "-o" || arg == "--output") {
585596
if (++i >= argc) {
586597
invalid_param = true;
@@ -1504,6 +1515,11 @@ int main(int argc, char ** argv) {
15041515

15051516
llama_kv_cache_clear(ctx);
15061517

1518+
// cool off before the test
1519+
if (params.delay) {
1520+
std::this_thread::sleep_for(std::chrono::seconds(params.delay));
1521+
}
1522+
15071523
struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads);
15081524
if (!parse_cpu_mask(t.cpu_mask, tpp.cpumask)) {
15091525
LOG_TEE("%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str());

0 commit comments

Comments
 (0)