Skip to content

Commit 7c6d166

Browse files
committed
convert.py: Outfile default name change and additional metadata support
1 parent 628b299 commit 7c6d166

File tree

1 file changed

+142
-24
lines changed

1 file changed

+142
-24
lines changed

convert.py

Lines changed: 142 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
2525
from dataclasses import dataclass
2626
from pathlib import Path
27-
from typing import TYPE_CHECKING, Any, Callable, ClassVar, IO, Iterable, Literal, Protocol, TypeVar, runtime_checkable
27+
from typing import TYPE_CHECKING, Any, Callable, ClassVar, IO, Iterable, Literal, Protocol, TypeVar, runtime_checkable, Optional
2828

2929
import numpy as np
3030
from sentencepiece import SentencePieceProcessor
@@ -341,10 +341,46 @@ def load(model_plus: ModelPlus) -> Params:
341341
return params
342342

343343

344+
@dataclass
345+
class Metadata:
346+
name: Optional[str] = None
347+
author: Optional[str] = None
348+
version: Optional[str] = None
349+
url: Optional[str] = None
350+
description: Optional[str] = None
351+
licence: Optional[str] = None
352+
source_url: Optional[str] = None
353+
source_hf_repo: Optional[str] = None
354+
355+
@staticmethod
356+
def load(metadata_path: Path) -> "Metadata":
357+
if metadata_path is None or not metadata_path.exists():
358+
return Metadata()
359+
360+
with open(metadata_path, 'r') as file:
361+
data = json.load(file)
362+
363+
# Create a new Metadata instance
364+
metadata = Metadata()
365+
366+
# Assigning values to Metadata attributes if they exist in the JSON file
367+
metadata.name = data.get("general.name")
368+
metadata.author = data.get("general.author")
369+
metadata.version = data.get("general.version")
370+
metadata.url = data.get("general.url")
371+
metadata.description = data.get("general.description")
372+
metadata.license = data.get("general.license")
373+
metadata.source_url = data.get("general.source_url")
374+
metadata.source_hf_repo = data.get("general.source_hf_repo")
375+
376+
return metadata
377+
378+
344379
#
345380
# vocab
346381
#
347382

383+
348384
@runtime_checkable
349385
class BaseVocab(Protocol):
350386
tokenizer_model: ClassVar[str]
@@ -1062,21 +1098,42 @@ class OutputFile:
10621098
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE):
10631099
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
10641100

1065-
def add_meta_arch(self, params: Params) -> None:
1101+
def add_meta_model(self, params: Params, metadata: Metadata) -> None:
1102+
# Metadata About The Model And It's Provenence
10661103
name = "LLaMA"
1067-
1068-
# TODO: better logic to determine model name
1069-
if params.n_ctx == 4096:
1070-
name = "LLaMA v2"
1104+
if metadata is not None and metadata.name is not None:
1105+
name = metadata.name
10711106
elif params.path_model is not None:
1072-
name = str(params.path_model.parent).split('/')[-1]
1073-
1074-
self.gguf.add_name (name)
1075-
self.gguf.add_vocab_size (params.n_vocab)
1076-
self.gguf.add_context_length (params.n_ctx)
1077-
self.gguf.add_embedding_length (params.n_embd)
1078-
self.gguf.add_block_count (params.n_layer)
1079-
self.gguf.add_feed_forward_length (params.n_ff)
1107+
name = str(params.path_model.parent).split("/")[-1]
1108+
elif params.n_ctx == 4096:
1109+
# Heuristic detection of LLaMA v2 model
1110+
name = "LLaMA v2"
1111+
1112+
self.gguf.add_name(name)
1113+
1114+
if metadata is not None:
1115+
if metadata.author is not None:
1116+
self.gguf.add_author(metadata.author)
1117+
if metadata.version is not None:
1118+
self.gguf.add_version(metadata.version)
1119+
if metadata.url is not None:
1120+
self.gguf.add_url(metadata.url)
1121+
if metadata.description is not None:
1122+
self.gguf.add_description(metadata.description)
1123+
if metadata.licence is not None:
1124+
self.gguf.add_licence(metadata.licence)
1125+
if metadata.source_url is not None:
1126+
self.gguf.add_source_url(metadata.source_url)
1127+
if metadata.source_hf_repo is not None:
1128+
self.gguf.add_source_hf_repo(metadata.source_hf_repo)
1129+
1130+
def add_meta_arch(self, params: Params) -> None:
1131+
# Metadata About The Neural Architecture Itself
1132+
self.gguf.add_vocab_size(params.n_vocab)
1133+
self.gguf.add_context_length(params.n_ctx)
1134+
self.gguf.add_embedding_length(params.n_embd)
1135+
self.gguf.add_block_count(params.n_layer)
1136+
self.gguf.add_feed_forward_length(params.n_ff)
10801137
self.gguf.add_rope_dimension_count(params.n_embd // params.n_head)
10811138
self.gguf.add_head_count (params.n_head)
10821139
self.gguf.add_head_count_kv (params.n_head_kv)
@@ -1179,13 +1236,14 @@ def close(self) -> None:
11791236
@staticmethod
11801237
def write_vocab_only(
11811238
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
1182-
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False,
1239+
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: Metadata = None,
11831240
) -> None:
11841241
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
11851242

11861243
of = OutputFile(fname_out, endianess=endianess)
11871244

11881245
# meta data
1246+
of.add_meta_model(params, metadata)
11891247
of.add_meta_arch(params)
11901248
of.add_meta_vocab(vocab)
11911249
of.add_meta_special_vocab(svocab)
@@ -1212,12 +1270,14 @@ def write_all(
12121270
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab,
12131271
concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
12141272
pad_vocab: bool = False,
1273+
metadata: Metadata = None,
12151274
) -> None:
12161275
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
12171276

12181277
of = OutputFile(fname_out, endianess=endianess)
12191278

12201279
# meta data
1280+
of.add_meta_model(params, metadata)
12211281
of.add_meta_arch(params)
12221282
if isinstance(vocab, Vocab):
12231283
of.add_meta_vocab(vocab)
@@ -1253,6 +1313,37 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
12531313
raise ValueError(f"Unexpected combination of types: {name_to_type}")
12541314

12551315

1316+
def model_parameter_count(model: LazyModel) -> int:
1317+
total_model_parameters = 0
1318+
for i, (name, lazy_tensor) in enumerate(model.items()):
1319+
sum_weights_in_tensor = 1
1320+
for dim in lazy_tensor.shape:
1321+
sum_weights_in_tensor *= dim
1322+
total_model_parameters += sum_weights_in_tensor
1323+
return total_model_parameters
1324+
1325+
1326+
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
1327+
if model_params_count > 1e12 :
1328+
# Trillions Of Parameters
1329+
scaled_model_params = model_params_count * 1e-12
1330+
scale_suffix = "T"
1331+
elif model_params_count > 1e9 :
1332+
# Billions Of Parameters
1333+
scaled_model_params = model_params_count * 1e-9
1334+
scale_suffix = "B"
1335+
elif model_params_count > 1e6 :
1336+
# Millions Of Parameters
1337+
scaled_model_params = model_params_count * 1e-6
1338+
scale_suffix = "M"
1339+
else:
1340+
# Thousands Of Parameters
1341+
scaled_model_params = model_params_count * 1e-3
1342+
scale_suffix = "K"
1343+
1344+
return f"{round(scaled_model_params)}{scale_suffix}"
1345+
1346+
12561347
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
12571348
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
12581349
for (name, tensor) in model.items()}
@@ -1432,13 +1523,30 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
14321523
return vocab, special_vocab
14331524

14341525

1435-
def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
1436-
namestr = {
1437-
GGMLFileType.AllF32: "f32",
1438-
GGMLFileType.MostlyF16: "f16",
1439-
GGMLFileType.MostlyQ8_0:"q8_0",
1526+
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
1527+
quantization = {
1528+
GGMLFileType.AllF32: "F32",
1529+
GGMLFileType.MostlyF16: "F16",
1530+
GGMLFileType.MostlyQ8_0: "Q8_0",
14401531
}[file_type]
1441-
ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
1532+
1533+
parameters = model_parameter_count_rounded_notation(model_params_count)
1534+
1535+
expert_count = ""
1536+
if params.n_experts is not None:
1537+
expert_count = f"{params.n_experts}x"
1538+
1539+
version = ""
1540+
if metadata is not None and metadata.version is not None:
1541+
version = f"-{metadata.version}"
1542+
1543+
name = "ggml-model"
1544+
if metadata is not None and metadata.name is not None:
1545+
name = metadata.name
1546+
elif params.path_model is not None:
1547+
name = params.path_model.name
1548+
1549+
ret = model_paths[0].parent / f"{name}{version}-{expert_count}{parameters}-{quantization}.gguf"
14421550
if ret in model_paths:
14431551
logger.error(
14441552
f"Error: Default output path ({ret}) would overwrite the input. "
@@ -1476,6 +1584,7 @@ def main(args_in: list[str] | None = None) -> None:
14761584
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
14771585
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
14781586
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
1587+
parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file")
14791588

14801589
args = parser.parse_args(args_in)
14811590

@@ -1487,6 +1596,8 @@ def main(args_in: list[str] | None = None) -> None:
14871596
else:
14881597
logging.basicConfig(level=logging.INFO)
14891598

1599+
metadata = Metadata.load(args.metadata)
1600+
14901601
if args.no_vocab and args.vocab_only:
14911602
raise ValueError("--vocab-only does not make sense with --no-vocab")
14921603

@@ -1500,6 +1611,9 @@ def main(args_in: list[str] | None = None) -> None:
15001611
else:
15011612
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
15021613

1614+
model_params_count = model_parameter_count(model_plus.model)
1615+
logger.info(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})")
1616+
15031617
if args.dump:
15041618
do_dump_model(model_plus)
15051619
return
@@ -1540,26 +1654,30 @@ def main(args_in: list[str] | None = None) -> None:
15401654
raise ValueError("need --outfile if using --vocab-only")
15411655
outfile = args.outfile
15421656
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
1543-
endianess=endianess, pad_vocab=args.pad_vocab)
1657+
endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
15441658
logger.info(f"Wrote {outfile}")
15451659
return
15461660

15471661
if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
15481662
vocab = model_plus.vocab
15491663

15501664
logger.info(f"Vocab info: {vocab}")
1665+
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
1666+
load_merges = True,
1667+
n_vocab = vocab.vocab_size)
1668+
15511669
logger.info(f"Special vocab info: {special_vocab}")
15521670
model = model_plus.model
15531671
model = convert_model_names(model, params, args.skip_unknown)
15541672
ftype = pick_output_type(model, args.outtype)
15551673
model = convert_to_output_type(model, ftype)
1556-
outfile = args.outfile or default_outfile(model_plus.paths, ftype)
1674+
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata)
15571675

15581676
params.ftype = ftype
15591677
logger.info(f"Writing {outfile}, format {ftype}")
15601678

15611679
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
1562-
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
1680+
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
15631681
logger.info(f"Wrote {outfile}")
15641682

15651683

0 commit comments

Comments
 (0)