Skip to content

Support converting models with multiple chat templates #6588

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions gguf-py/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ pip install gguf

[scripts/gguf-convert-endian.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf-convert-endian.py) — Allows converting the endianness of GGUF files.

[scripts/gguf-new-metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/scripts/gguf-new-metadata.py) — Copies a GGUF file with added/modified/removed metadata values.

## Development
Maintainers who participate in development of this package are advised to install it in editable mode:

Expand Down
2 changes: 2 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class Tokenizer:
HF_JSON = "tokenizer.huggingface.json"
RWKV = "tokenizer.rwkv.world"
CHAT_TEMPLATE = "tokenizer.chat_template"
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
CHAT_TEMPLATES = "tokenizer.chat_templates"
# FIM/Infill special tokens constants
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
Expand Down
31 changes: 29 additions & 2 deletions gguf-py/gguf/gguf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import tempfile
from enum import Enum, auto
from io import BufferedWriter
from typing import IO, Any, Sequence
from typing import IO, Any, Sequence, Mapping
from string import ascii_letters, digits

import numpy as np

Expand Down Expand Up @@ -466,7 +467,33 @@ def add_add_eos_token(self, value: bool) -> None:
def add_add_space_prefix(self, value: bool) -> None:
self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)

def add_chat_template(self, value: str) -> None:
def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
if isinstance(value, list):
template_default = None
template_names = set()

for choice in value:
name = choice.get('name', '')
template = choice.get('template')

# Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
name = ''.join((c if c in ascii_letters + digits else '_' for c in name))

if name and template is not None:
if name == 'default':
template_default = template
else:
template_names.add(name)
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template)

if template_names:
self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))

if template_default is None:
return

value = template_default

self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)

def add_prefix_token_id(self, id: int) -> None:
Expand Down
2 changes: 1 addition & 1 deletion gguf-py/gguf/vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
with open(tokenizer_config_file, encoding = 'utf-8') as f:
tokenizer_config = json.load(f)
chat_template = tokenizer_config.get('chat_template')
if chat_template is None or isinstance(chat_template, str):
if chat_template is None or isinstance(chat_template, (str, list)):
self.chat_template = chat_template
else:
print(
Expand Down
1 change: 1 addition & 0 deletions gguf-py/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@ build-backend = "poetry.core.masonry.api"
gguf-convert-endian = "scripts:gguf_convert_endian_entrypoint"
gguf-dump = "scripts:gguf_dump_entrypoint"
gguf-set-metadata = "scripts:gguf_set_metadata_entrypoint"
gguf-new-metadata = "scripts:gguf_new_metadata_entrypoint"
1 change: 1 addition & 0 deletions gguf-py/scripts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@
gguf_convert_endian_entrypoint = import_module("scripts.gguf-convert-endian").main
gguf_dump_entrypoint = import_module("scripts.gguf-dump").main
gguf_set_metadata_entrypoint = import_module("scripts.gguf-set-metadata").main
gguf_new_metadata_entrypoint = import_module("scripts.gguf-new-metadata").main

del import_module, os
190 changes: 190 additions & 0 deletions gguf-py/scripts/gguf-new-metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
#!/usr/bin/env python3
import logging
import argparse
import os
import sys
import json
from pathlib import Path

import numpy as np
from typing import Any, Mapping, Sequence

# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))

import gguf

logger = logging.getLogger("gguf-new-metadata")


def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian:
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
# Host is little endian
host_endian = gguf.GGUFEndian.LITTLE
swapped_endian = gguf.GGUFEndian.BIG
else:
# Sorry PDP or other weird systems that don't use BE or LE.
host_endian = gguf.GGUFEndian.BIG
swapped_endian = gguf.GGUFEndian.LITTLE

if reader.byte_order == "S":
return swapped_endian
else:
return host_endian


def decode_field(field: gguf.ReaderField) -> Any:
if field and field.types:
main_type = field.types[0]

if main_type == gguf.GGUFValueType.ARRAY:
sub_type = field.types[-1]

if sub_type == gguf.GGUFValueType.STRING:
return [str(bytes(field.parts[idx]), encoding='utf8') for idx in field.data]
else:
return [pv for idx in field.data for pv in field.parts[idx].tolist()]
if main_type == gguf.GGUFValueType.STRING:
return str(bytes(field.parts[-1]), encoding='utf8')
else:
return field.parts[-1][0]

return None


def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
field = reader.get_field(key)

return decode_field(field)


def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: Mapping[str, str], remove_metadata: Sequence[str]) -> None:
for field in reader.fields.values():
# Suppress virtual fields and fields written by GGUFWriter
if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
logger.debug(f'Suppressing {field.name}')
continue

# Skip old chat templates if we have new ones
if field.name.startswith(gguf.Keys.Tokenizer.CHAT_TEMPLATE) and gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
logger.debug(f'Skipping {field.name}')
continue

if field.name in remove_metadata:
logger.debug(f'Removing {field.name}')
continue

old_val = decode_field(field)
val = new_metadata.get(field.name, old_val)

if field.name in new_metadata:
logger.debug(f'Modifying {field.name}: "{old_val}" -> "{val}"')
del new_metadata[field.name]
elif val is not None:
logger.debug(f'Copying {field.name}')

if val is not None:
writer.add_key(field.name)
writer.add_val(val, field.types[0])

if gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
logger.debug('Adding chat template(s)')
writer.add_chat_template(new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE])
del new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE]

# TODO: Support other types than string?
for key, val in new_metadata.items():
logger.debug(f'Adding {key}: {val}')
writer.add_key(key)
writer.add_val(val, gguf.GGUFValueType.STRING)

for tensor in reader.tensors:
# Dimensions are written in reverse order, so flip them first
shape = np.flipud(tensor.shape)
writer.add_tensor_info(tensor.name, shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)

writer.write_header_to_file()
writer.write_kv_data_to_file()
writer.write_ti_data_to_file()

for tensor in reader.tensors:
writer.write_tensor_data(tensor.data)

writer.close()


def main() -> None:
parser = argparse.ArgumentParser(description="Make a copy of a GGUF file with new metadata")
parser.add_argument("input", type=Path, help="GGUF format model input filename")
parser.add_argument("output", type=Path, help="GGUF format model output filename")
parser.add_argument("--general-name", type=str, help="The models general.name")
parser.add_argument("--general-description", type=str, help="The models general.description")
parser.add_argument("--chat-template", type=str, help="Chat template string (or JSON string containing templates)")
parser.add_argument("--chat-template-config", type=Path, help="Config file (tokenizer_config.json) containing chat template(s)")
parser.add_argument("--remove-metadata", action="append", type=str, help="Remove metadata (by key name) from output model")
parser.add_argument("--force", action="store_true", help="Bypass warnings without confirmation")
parser.add_argument("--verbose", action="store_true", help="Increase output verbosity")
args = parser.parse_args(None if len(sys.argv) > 2 else ["--help"])

logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

new_metadata = {}
remove_metadata = args.remove_metadata or []

if args.general_name:
new_metadata[gguf.Keys.General.NAME] = args.general_name

if args.general_description:
new_metadata[gguf.Keys.General.DESCRIPTION] = args.general_description

if args.chat_template:
new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = json.loads(args.chat_template) if args.chat_template.startswith('[') else args.chat_template

if args.chat_template_config:
with open(args.chat_template_config, 'r') as fp:
config = json.load(fp)
template = config.get('chat_template')
if template:
new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = template

if remove_metadata:
logger.warning('*** Warning *** Warning *** Warning **')
logger.warning('* Most metadata is required for a fully functional GGUF file,')
logger.warning('* removing crucial metadata may result in a corrupt output file!')

if not args.force:
logger.warning('* Enter exactly YES if you are positive you want to proceed:')
response = input('YES, I am sure> ')
if response != 'YES':
logger.info("You didn't enter YES. Okay then, see ya!")
sys.exit(0)

logger.info(f'* Loading: {args.input}')
reader = gguf.GGUFReader(args.input, 'r')

arch = get_field_data(reader, gguf.Keys.General.ARCHITECTURE)
endianess = get_byteorder(reader)

if os.path.isfile(args.output) and not args.force:
logger.warning('*** Warning *** Warning *** Warning **')
logger.warning(f'* The "{args.output}" GGUF file already exists, it will be overwritten!')
logger.warning('* Enter exactly YES if you are positive you want to proceed:')
response = input('YES, I am sure> ')
if response != 'YES':
logger.info("You didn't enter YES. Okay then, see ya!")
sys.exit(0)

logger.info(f'* Writing: {args.output}')
writer = gguf.GGUFWriter(args.output, arch=arch, endianess=endianess)

alignment = get_field_data(reader, gguf.Keys.General.ALIGNMENT)
if alignment is not None:
logger.debug(f'Setting custom alignment: {alignment}')
writer.data_alignment = alignment

copy_with_new_metadata(reader, writer, new_metadata, remove_metadata)


if __name__ == '__main__':
main()