@@ -211,13 +211,19 @@ static std::vector<int> parse_int_range(const std::string & s) {
211
211
for (int i = first; i <= last;) {
212
212
result.push_back (i);
213
213
214
+ int prev_i = i;
215
+
214
216
if (op == ' +' ) {
215
217
i += step;
216
218
} else if (op == ' *' ) {
217
219
i *= step;
218
220
} else {
219
221
throw std::invalid_argument (" invalid range format" );
220
222
}
223
+
224
+ if (i <= prev_i) {
225
+ throw std::invalid_argument (" invalid range" );
226
+ }
221
227
}
222
228
search_start = match.suffix ().first ;
223
229
}
@@ -239,6 +245,7 @@ struct cmd_params {
239
245
std::vector<int > n_ubatch;
240
246
std::vector<ggml_type> type_k;
241
247
std::vector<ggml_type> type_v;
248
+ std::vector<float > defrag_thold;
242
249
std::vector<int > n_threads;
243
250
std::vector<std::string> cpu_mask;
244
251
std::vector<bool > cpu_strict;
@@ -274,6 +281,7 @@ static const cmd_params cmd_params_defaults = {
274
281
/* n_ubatch */ { 512 },
275
282
/* type_k */ { GGML_TYPE_F16 },
276
283
/* type_v */ { GGML_TYPE_F16 },
284
+ /* defrag_thold */ { -1 .0f },
277
285
/* n_threads */ { cpu_get_num_math () },
278
286
/* cpu_mask */ { " 0x0" },
279
287
/* cpu_strict */ { false },
@@ -335,6 +343,8 @@ static void print_usage(int /* argc */, char ** argv) {
335
343
join (transform_to_str (cmd_params_defaults.type_k , ggml_type_name), " ," ).c_str ());
336
344
printf (" -ctv, --cache-type-v <t> (default: %s)\n " ,
337
345
join (transform_to_str (cmd_params_defaults.type_v , ggml_type_name), " ," ).c_str ());
346
+ printf (" -dt, --defrag-thold <f> (default: %s)\n " ,
347
+ join (cmd_params_defaults.defrag_thold , " ," ).c_str ());
338
348
printf (" -t, --threads <n> (default: %s)\n " ,
339
349
join (cmd_params_defaults.n_threads , " ," ).c_str ());
340
350
printf (" -C, --cpu-mask <hex,hex> (default: %s)\n " ,
@@ -368,7 +378,7 @@ static void print_usage(int /* argc */, char ** argv) {
368
378
printf (
369
379
" Multiple values can be given for each parameter by separating them with ','\n "
370
380
" or by specifying the parameter multiple times. Ranges can be given as\n "
371
- " 'start-end ' or 'start-end +step' or 'start-end *mult'.\n " );
381
+ " 'first-last ' or 'first-last +step' or 'first-last *mult'.\n " );
372
382
}
373
383
374
384
static ggml_type ggml_type_from_name (const std::string & s) {
@@ -519,6 +529,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
519
529
break ;
520
530
}
521
531
params.type_v .insert (params.type_v .end (), types.begin (), types.end ());
532
+ } else if (arg == " -dt" || arg == " --defrag-thold" ) {
533
+ if (++i >= argc) {
534
+ invalid_param = true ;
535
+ break ;
536
+ }
537
+ auto p = string_split<float >(argv[i], split_delim);
538
+ params.defrag_thold .insert (params.defrag_thold .end (), p.begin (), p.end ());
522
539
} else if (arg == " -t" || arg == " --threads" ) {
523
540
if (++i >= argc) {
524
541
invalid_param = true ;
@@ -825,6 +842,9 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
825
842
if (params.type_v .empty ()) {
826
843
params.type_v = cmd_params_defaults.type_v ;
827
844
}
845
+ if (params.defrag_thold .empty ()) {
846
+ params.defrag_thold = cmd_params_defaults.defrag_thold ;
847
+ }
828
848
if (params.n_gpu_layers .empty ()) {
829
849
params.n_gpu_layers = cmd_params_defaults.n_gpu_layers ;
830
850
}
@@ -883,6 +903,7 @@ struct cmd_params_instance {
883
903
int n_ubatch;
884
904
ggml_type type_k;
885
905
ggml_type type_v;
906
+ float defrag_thold;
886
907
int n_threads;
887
908
std::string cpu_mask;
888
909
bool cpu_strict;
@@ -959,15 +980,16 @@ struct cmd_params_instance {
959
980
llama_context_params to_llama_cparams () const {
960
981
llama_context_params cparams = llama_context_default_params ();
961
982
962
- cparams.n_ctx = n_prompt + n_gen + n_depth;
963
- cparams.n_batch = n_batch;
964
- cparams.n_ubatch = n_ubatch;
965
- cparams.type_k = type_k;
966
- cparams.type_v = type_v;
967
- cparams.offload_kqv = !no_kv_offload;
968
- cparams.flash_attn = flash_attn;
969
- cparams.embeddings = embeddings;
970
- cparams.op_offload = !no_op_offload;
983
+ cparams.n_ctx = n_prompt + n_gen + n_depth;
984
+ cparams.n_batch = n_batch;
985
+ cparams.n_ubatch = n_ubatch;
986
+ cparams.type_k = type_k;
987
+ cparams.type_v = type_v;
988
+ cparams.defrag_thold = defrag_thold;
989
+ cparams.offload_kqv = !no_kv_offload;
990
+ cparams.flash_attn = flash_attn;
991
+ cparams.embeddings = embeddings;
992
+ cparams.op_offload = !no_op_offload;
971
993
972
994
return cparams;
973
995
}
@@ -992,6 +1014,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
992
1014
for (const auto & nub : params.n_ubatch )
993
1015
for (const auto & tk : params.type_k )
994
1016
for (const auto & tv : params.type_v )
1017
+ for (const auto & defrag_thold : params.defrag_thold )
995
1018
for (const auto & nkvo : params.no_kv_offload )
996
1019
for (const auto & fa : params.flash_attn )
997
1020
for (const auto & nt : params.n_threads )
@@ -1012,6 +1035,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
1012
1035
/* .n_ubatch = */ nub,
1013
1036
/* .type_k = */ tk,
1014
1037
/* .type_v = */ tv,
1038
+ /* .defrag_thold = */ defrag_thold,
1015
1039
/* .n_threads = */ nt,
1016
1040
/* .cpu_mask = */ cm,
1017
1041
/* .cpu_strict = */ cs,
@@ -1044,6 +1068,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
1044
1068
/* .n_ubatch = */ nub,
1045
1069
/* .type_k = */ tk,
1046
1070
/* .type_v = */ tv,
1071
+ /* .defrag_thold = */ defrag_thold,
1047
1072
/* .n_threads = */ nt,
1048
1073
/* .cpu_mask = */ cm,
1049
1074
/* .cpu_strict = */ cs,
@@ -1076,6 +1101,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
1076
1101
/* .n_ubatch = */ nub,
1077
1102
/* .type_k = */ tk,
1078
1103
/* .type_v = */ tv,
1104
+ /* .defrag_thold = */ defrag_thold,
1079
1105
/* .n_threads = */ nt,
1080
1106
/* .cpu_mask = */ cm,
1081
1107
/* .cpu_strict = */ cs,
@@ -1117,6 +1143,7 @@ struct test {
1117
1143
int poll;
1118
1144
ggml_type type_k;
1119
1145
ggml_type type_v;
1146
+ float defrag_thold;
1120
1147
int n_gpu_layers;
1121
1148
llama_split_mode split_mode;
1122
1149
int main_gpu;
@@ -1151,6 +1178,7 @@ struct test {
1151
1178
poll = inst.poll ;
1152
1179
type_k = inst.type_k ;
1153
1180
type_v = inst.type_v ;
1181
+ defrag_thold = inst.defrag_thold ;
1154
1182
n_gpu_layers = inst.n_gpu_layers ;
1155
1183
split_mode = inst.split_mode ;
1156
1184
main_gpu = inst.main_gpu ;
@@ -1206,6 +1234,7 @@ struct test {
1206
1234
" model_type" , " model_size" , " model_n_params" , " n_batch" , " n_ubatch" , " n_threads" ,
1207
1235
" cpu_mask" , " cpu_strict" , " poll" , " type_k" , " type_v" , " n_gpu_layers" ,
1208
1236
" split_mode" , " main_gpu" , " no_kv_offload" , " flash_attn" , " tensor_split" , " tensor_buft_overrides" ,
1237
+ " defrag_thold" ,
1209
1238
" use_mmap" , " embeddings" , " no_op_offload" , " n_prompt" , " n_gen" , " n_depth" , " test_time" ,
1210
1239
" avg_ns" , " stddev_ns" , " avg_ts" , " stddev_ts" ,
1211
1240
};
@@ -1225,7 +1254,7 @@ struct test {
1225
1254
field == " use_mmap" || field == " embeddings" ) {
1226
1255
return BOOL;
1227
1256
}
1228
- if (field == " avg_ts" || field == " stddev_ts" ) {
1257
+ if (field == " avg_ts" || field == " stddev_ts" || field == " defrag_thold " ) {
1229
1258
return FLOAT;
1230
1259
}
1231
1260
return STRING;
@@ -1292,6 +1321,7 @@ struct test {
1292
1321
std::to_string (flash_attn),
1293
1322
tensor_split_str,
1294
1323
tensor_buft_overrides_str,
1324
+ std::to_string (defrag_thold),
1295
1325
std::to_string (use_mmap),
1296
1326
std::to_string (embeddings),
1297
1327
std::to_string (no_op_offload),
@@ -1558,6 +1588,9 @@ struct markdown_printer : public printer {
1558
1588
if (params.type_v .size () > 1 || params.type_v != cmd_params_defaults.type_v ) {
1559
1589
fields.emplace_back (" type_v" );
1560
1590
}
1591
+ if (params.defrag_thold .size () > 1 || params.defrag_thold != cmd_params_defaults.defrag_thold ) {
1592
+ fields.emplace_back (" defrag_thold" );
1593
+ }
1561
1594
if (params.main_gpu .size () > 1 || params.main_gpu != cmd_params_defaults.main_gpu ) {
1562
1595
fields.emplace_back (" main_gpu" );
1563
1596
}
0 commit comments