24
24
from concurrent .futures import ProcessPoolExecutor , ThreadPoolExecutor
25
25
from dataclasses import dataclass
26
26
from pathlib import Path
27
- from typing import TYPE_CHECKING , Any , Callable , ClassVar , IO , Iterable , Literal , Protocol , TypeVar , runtime_checkable
27
+ from typing import TYPE_CHECKING , Any , Callable , ClassVar , IO , Iterable , Literal , Protocol , TypeVar , runtime_checkable , Optional
28
28
29
29
import numpy as np
30
30
from sentencepiece import SentencePieceProcessor
@@ -341,10 +341,46 @@ def load(model_plus: ModelPlus) -> Params:
341
341
return params
342
342
343
343
344
+ @dataclass
345
+ class Metadata :
346
+ name : Optional [str ] = None
347
+ author : Optional [str ] = None
348
+ version : Optional [str ] = None
349
+ url : Optional [str ] = None
350
+ description : Optional [str ] = None
351
+ licence : Optional [str ] = None
352
+ source_url : Optional [str ] = None
353
+ source_hf_repo : Optional [str ] = None
354
+
355
+ @staticmethod
356
+ def load (metadata_path : Path ) -> "Metadata" :
357
+ if metadata_path is None or not metadata_path .exists ():
358
+ return Metadata ()
359
+
360
+ with open (metadata_path , 'r' ) as file :
361
+ data = json .load (file )
362
+
363
+ # Create a new Metadata instance
364
+ metadata = Metadata ()
365
+
366
+ # Assigning values to Metadata attributes if they exist in the JSON file
367
+ metadata .name = data .get ("general.name" )
368
+ metadata .author = data .get ("general.author" )
369
+ metadata .version = data .get ("general.version" )
370
+ metadata .url = data .get ("general.url" )
371
+ metadata .description = data .get ("general.description" )
372
+ metadata .license = data .get ("general.license" )
373
+ metadata .source_url = data .get ("general.source_url" )
374
+ metadata .source_hf_repo = data .get ("general.source_hf_repo" )
375
+
376
+ return metadata
377
+
378
+
344
379
#
345
380
# vocab
346
381
#
347
382
383
+
348
384
@runtime_checkable
349
385
class BaseVocab (Protocol ):
350
386
tokenizer_model : ClassVar [str ]
@@ -1062,21 +1098,42 @@ class OutputFile:
1062
1098
def __init__ (self , fname_out : Path , endianess :gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ):
1063
1099
self .gguf = gguf .GGUFWriter (fname_out , gguf .MODEL_ARCH_NAMES [ARCH ], endianess = endianess )
1064
1100
1065
- def add_meta_arch (self , params : Params ) -> None :
1101
+ def add_meta_model (self , params : Params , metadata : Metadata ) -> None :
1102
+ # Metadata About The Model And It's Provenence
1066
1103
name = "LLaMA"
1067
-
1068
- # TODO: better logic to determine model name
1069
- if params .n_ctx == 4096 :
1070
- name = "LLaMA v2"
1104
+ if metadata is not None and metadata .name is not None :
1105
+ name = metadata .name
1071
1106
elif params .path_model is not None :
1072
- name = str (params .path_model .parent ).split ('/' )[- 1 ]
1073
-
1074
- self .gguf .add_name (name )
1075
- self .gguf .add_vocab_size (params .n_vocab )
1076
- self .gguf .add_context_length (params .n_ctx )
1077
- self .gguf .add_embedding_length (params .n_embd )
1078
- self .gguf .add_block_count (params .n_layer )
1079
- self .gguf .add_feed_forward_length (params .n_ff )
1107
+ name = str (params .path_model .parent ).split ("/" )[- 1 ]
1108
+ elif params .n_ctx == 4096 :
1109
+ # Heuristic detection of LLaMA v2 model
1110
+ name = "LLaMA v2"
1111
+
1112
+ self .gguf .add_name (name )
1113
+
1114
+ if metadata is not None :
1115
+ if metadata .author is not None :
1116
+ self .gguf .add_author (metadata .author )
1117
+ if metadata .version is not None :
1118
+ self .gguf .add_version (metadata .version )
1119
+ if metadata .url is not None :
1120
+ self .gguf .add_url (metadata .url )
1121
+ if metadata .description is not None :
1122
+ self .gguf .add_description (metadata .description )
1123
+ if metadata .licence is not None :
1124
+ self .gguf .add_licence (metadata .licence )
1125
+ if metadata .source_url is not None :
1126
+ self .gguf .add_source_url (metadata .source_url )
1127
+ if metadata .source_hf_repo is not None :
1128
+ self .gguf .add_source_hf_repo (metadata .source_hf_repo )
1129
+
1130
+ def add_meta_arch (self , params : Params ) -> None :
1131
+ # Metadata About The Neural Architecture Itself
1132
+ self .gguf .add_vocab_size (params .n_vocab )
1133
+ self .gguf .add_context_length (params .n_ctx )
1134
+ self .gguf .add_embedding_length (params .n_embd )
1135
+ self .gguf .add_block_count (params .n_layer )
1136
+ self .gguf .add_feed_forward_length (params .n_ff )
1080
1137
self .gguf .add_rope_dimension_count (params .n_embd // params .n_head )
1081
1138
self .gguf .add_head_count (params .n_head )
1082
1139
self .gguf .add_head_count_kv (params .n_head_kv )
@@ -1179,13 +1236,14 @@ def close(self) -> None:
1179
1236
@staticmethod
1180
1237
def write_vocab_only (
1181
1238
fname_out : Path , params : Params , vocab : Vocab , svocab : gguf .SpecialVocab ,
1182
- endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE , pad_vocab : bool = False ,
1239
+ endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE , pad_vocab : bool = False , metadata : Metadata = None ,
1183
1240
) -> None :
1184
1241
check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1185
1242
1186
1243
of = OutputFile (fname_out , endianess = endianess )
1187
1244
1188
1245
# meta data
1246
+ of .add_meta_model (params , metadata )
1189
1247
of .add_meta_arch (params )
1190
1248
of .add_meta_vocab (vocab )
1191
1249
of .add_meta_special_vocab (svocab )
@@ -1212,12 +1270,14 @@ def write_all(
1212
1270
fname_out : Path , ftype : GGMLFileType , params : Params , model : LazyModel , vocab : BaseVocab , svocab : gguf .SpecialVocab ,
1213
1271
concurrency : int = DEFAULT_CONCURRENCY , endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ,
1214
1272
pad_vocab : bool = False ,
1273
+ metadata : Metadata = None ,
1215
1274
) -> None :
1216
1275
check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1217
1276
1218
1277
of = OutputFile (fname_out , endianess = endianess )
1219
1278
1220
1279
# meta data
1280
+ of .add_meta_model (params , metadata )
1221
1281
of .add_meta_arch (params )
1222
1282
if isinstance (vocab , Vocab ):
1223
1283
of .add_meta_vocab (vocab )
@@ -1253,6 +1313,37 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
1253
1313
raise ValueError (f"Unexpected combination of types: { name_to_type } " )
1254
1314
1255
1315
1316
+ def model_parameter_count (model : LazyModel ) -> int :
1317
+ total_model_parameters = 0
1318
+ for i , (name , lazy_tensor ) in enumerate (model .items ()):
1319
+ sum_weights_in_tensor = 1
1320
+ for dim in lazy_tensor .shape :
1321
+ sum_weights_in_tensor *= dim
1322
+ total_model_parameters += sum_weights_in_tensor
1323
+ return total_model_parameters
1324
+
1325
+
1326
+ def model_parameter_count_rounded_notation (model_params_count : int ) -> str :
1327
+ if model_params_count > 1e12 :
1328
+ # Trillions Of Parameters
1329
+ scaled_model_params = model_params_count * 1e-12
1330
+ scale_suffix = "T"
1331
+ elif model_params_count > 1e9 :
1332
+ # Billions Of Parameters
1333
+ scaled_model_params = model_params_count * 1e-9
1334
+ scale_suffix = "B"
1335
+ elif model_params_count > 1e6 :
1336
+ # Millions Of Parameters
1337
+ scaled_model_params = model_params_count * 1e-6
1338
+ scale_suffix = "M"
1339
+ else :
1340
+ # Thousands Of Parameters
1341
+ scaled_model_params = model_params_count * 1e-3
1342
+ scale_suffix = "K"
1343
+
1344
+ return f"{ round (scaled_model_params )} { scale_suffix } "
1345
+
1346
+
1256
1347
def convert_to_output_type (model : LazyModel , output_type : GGMLFileType ) -> LazyModel :
1257
1348
return {name : tensor .astype (output_type .type_for_tensor (name , tensor ))
1258
1349
for (name , tensor ) in model .items ()}
@@ -1432,13 +1523,30 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
1432
1523
return vocab , special_vocab
1433
1524
1434
1525
1435
- def default_outfile (model_paths : list [Path ], file_type : GGMLFileType ) -> Path :
1436
- namestr = {
1437
- GGMLFileType .AllF32 : "f32 " ,
1438
- GGMLFileType .MostlyF16 : "f16 " ,
1439
- GGMLFileType .MostlyQ8_0 :"q8_0 " ,
1526
+ def default_outfile (model_paths : list [Path ], file_type : GGMLFileType , params : Params , model_params_count : int , metadata : Metadata ) -> Path :
1527
+ quantization = {
1528
+ GGMLFileType .AllF32 : "F32 " ,
1529
+ GGMLFileType .MostlyF16 : "F16 " ,
1530
+ GGMLFileType .MostlyQ8_0 : "Q8_0 " ,
1440
1531
}[file_type ]
1441
- ret = model_paths [0 ].parent / f"ggml-model-{ namestr } .gguf"
1532
+
1533
+ parameters = model_parameter_count_rounded_notation (model_params_count )
1534
+
1535
+ expert_count = ""
1536
+ if params .n_experts is not None :
1537
+ expert_count = f"{ params .n_experts } x"
1538
+
1539
+ version = ""
1540
+ if metadata is not None and metadata .version is not None :
1541
+ version = f"-{ metadata .version } "
1542
+
1543
+ name = "ggml-model"
1544
+ if metadata is not None and metadata .name is not None :
1545
+ name = metadata .name
1546
+ elif params .path_model is not None :
1547
+ name = params .path_model .name
1548
+
1549
+ ret = model_paths [0 ].parent / f"{ name } { version } -{ expert_count } { parameters } -{ quantization } .gguf"
1442
1550
if ret in model_paths :
1443
1551
logger .error (
1444
1552
f"Error: Default output path ({ ret } ) would overwrite the input. "
@@ -1476,6 +1584,7 @@ def main(args_in: list[str] | None = None) -> None:
1476
1584
parser .add_argument ("--pad-vocab" , action = "store_true" , help = "add pad tokens when model vocab expects more than tokenizer metadata provides" )
1477
1585
parser .add_argument ("--skip-unknown" , action = "store_true" , help = "skip unknown tensor names instead of failing" )
1478
1586
parser .add_argument ("--verbose" , action = "store_true" , help = "increase output verbosity" )
1587
+ parser .add_argument ("--metadata" , type = Path , help = "Specify the path for a metadata file" )
1479
1588
1480
1589
args = parser .parse_args (args_in )
1481
1590
@@ -1487,6 +1596,8 @@ def main(args_in: list[str] | None = None) -> None:
1487
1596
else :
1488
1597
logging .basicConfig (level = logging .INFO )
1489
1598
1599
+ metadata = Metadata .load (args .metadata )
1600
+
1490
1601
if args .no_vocab and args .vocab_only :
1491
1602
raise ValueError ("--vocab-only does not make sense with --no-vocab" )
1492
1603
@@ -1500,6 +1611,9 @@ def main(args_in: list[str] | None = None) -> None:
1500
1611
else :
1501
1612
model_plus = ModelPlus (model = {}, paths = [args .model / 'dummy' ], format = 'none' , vocab = None )
1502
1613
1614
+ model_params_count = model_parameter_count (model_plus .model )
1615
+ logger .info (f"model parameters count : { model_params_count } ({ model_parameter_count_rounded_notation (model_params_count )} )" )
1616
+
1503
1617
if args .dump :
1504
1618
do_dump_model (model_plus )
1505
1619
return
@@ -1540,26 +1654,30 @@ def main(args_in: list[str] | None = None) -> None:
1540
1654
raise ValueError ("need --outfile if using --vocab-only" )
1541
1655
outfile = args .outfile
1542
1656
OutputFile .write_vocab_only (outfile , params , vocab , special_vocab ,
1543
- endianess = endianess , pad_vocab = args .pad_vocab )
1657
+ endianess = endianess , pad_vocab = args .pad_vocab , metadata = metadata )
1544
1658
logger .info (f"Wrote { outfile } " )
1545
1659
return
1546
1660
1547
1661
if model_plus .vocab is not None and args .vocab_dir is None and not args .no_vocab :
1548
1662
vocab = model_plus .vocab
1549
1663
1550
1664
logger .info (f"Vocab info: { vocab } " )
1665
+ special_vocab = gguf .SpecialVocab (model_plus .paths [0 ].parent ,
1666
+ load_merges = True ,
1667
+ n_vocab = vocab .vocab_size )
1668
+
1551
1669
logger .info (f"Special vocab info: { special_vocab } " )
1552
1670
model = model_plus .model
1553
1671
model = convert_model_names (model , params , args .skip_unknown )
1554
1672
ftype = pick_output_type (model , args .outtype )
1555
1673
model = convert_to_output_type (model , ftype )
1556
- outfile = args .outfile or default_outfile (model_plus .paths , ftype )
1674
+ outfile = args .outfile or default_outfile (model_plus .paths , ftype , params , model_params_count , metadata )
1557
1675
1558
1676
params .ftype = ftype
1559
1677
logger .info (f"Writing { outfile } , format { ftype } " )
1560
1678
1561
1679
OutputFile .write_all (outfile , ftype , params , model , vocab , special_vocab ,
1562
- concurrency = args .concurrency , endianess = endianess , pad_vocab = args .pad_vocab )
1680
+ concurrency = args .concurrency , endianess = endianess , pad_vocab = args .pad_vocab , metadata = metadata )
1563
1681
logger .info (f"Wrote { outfile } " )
1564
1682
1565
1683
0 commit comments