Skip to content

Commit c3776ca

Browse files
mofosynecompilade
andauthored
gguf_dump.py: fix markddown kv array print (#8588)
* gguf_dump.py: fix markddown kv array print * Update gguf-py/scripts/gguf_dump.py Co-authored-by: compilade <git@compilade.net> * gguf_dump.py: refactor kv array string handling * gguf_dump.py: escape backticks inside of strings * gguf_dump.py: inline code markdown escape handler added >>> escape_markdown_inline_code("hello world") '`hello world`' >>> escape_markdown_inline_code("hello ` world") '``hello ` world``' * gguf_dump.py: handle edge case about backticks on start or end of a string --------- Co-authored-by: compilade <git@compilade.net>
1 parent 87e397d commit c3776ca

File tree

1 file changed

+38
-5
lines changed

1 file changed

+38
-5
lines changed

gguf-py/scripts/gguf_dump.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55
import argparse
66
import os
7+
import re
78
import sys
89
from pathlib import Path
910
from typing import Any
@@ -244,26 +245,58 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
244245
else:
245246
pretty_type = str(field.types[-1].name)
246247

248+
def escape_markdown_inline_code(value_string):
249+
# Find the longest contiguous sequence of backticks in the string then
250+
# wrap string with appropriate number of backticks required to escape it
251+
max_backticks = max((len(match.group(0)) for match in re.finditer(r'`+', value_string)), default=0)
252+
inline_code_marker = '`' * (max_backticks + 1)
253+
254+
# If the string starts or ends with a backtick, add a space at the beginning and end
255+
if value_string.startswith('`') or value_string.endswith('`'):
256+
value_string = f" {value_string} "
257+
258+
return f"{inline_code_marker}{value_string}{inline_code_marker}"
259+
247260
total_elements = len(field.data)
248261
value = ""
249262
if len(field.types) == 1:
250263
curr_type = field.types[0]
251264
if curr_type == GGUFValueType.STRING:
252-
value = repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60])
265+
truncate_length = 60
266+
value_string = str(bytes(field.parts[-1]), encoding='utf-8')
267+
if len(value_string) > truncate_length:
268+
head = escape_markdown_inline_code(value_string[:truncate_length // 2])
269+
tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
270+
value = "{head}...{tail}".format(head=head, tail=tail)
271+
else:
272+
value = escape_markdown_inline_code(value_string)
253273
elif curr_type in reader.gguf_scalar_to_np:
254274
value = str(field.parts[-1][0])
255275
else:
256276
if field.types[0] == GGUFValueType.ARRAY:
257277
curr_type = field.types[1]
278+
array_elements = []
279+
258280
if curr_type == GGUFValueType.STRING:
259281
render_element = min(5, total_elements)
260282
for element_pos in range(render_element):
261-
value += repr(str(bytes(field.parts[-1 - element_pos]), encoding='utf-8')[:5]) + (", " if total_elements > 1 else "")
283+
truncate_length = 30
284+
value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8')
285+
if len(value_string) > truncate_length:
286+
head = escape_markdown_inline_code(value_string[:truncate_length // 2])
287+
tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
288+
value = "{head}...{tail}".format(head=head, tail=tail)
289+
else:
290+
value = escape_markdown_inline_code(value_string)
291+
array_elements.append(value)
292+
262293
elif curr_type in reader.gguf_scalar_to_np:
263294
render_element = min(7, total_elements)
264295
for element_pos in range(render_element):
265-
value += str(field.parts[-1 - element_pos][0]) + (", " if total_elements > 1 else "")
266-
value = f'[ {value}{" ..." if total_elements > 1 else ""} ]'
296+
array_elements.append(str(field.parts[-1 - (total_elements - element_pos - 1)][0]))
297+
298+
value = f'[ {", ".join(array_elements).strip()}{", ..." if total_elements > len(array_elements) else ""} ]'
299+
267300
kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value})
268301

269302
kv_dump_table_header_map = [
@@ -382,7 +415,7 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None
382415
markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n"
383416
markdown_content += "\n\n"
384417

385-
print(markdown_content) # noqa: NP100
418+
print(markdown_content) # noqa: NP100
386419

387420

388421
def main() -> None:

0 commit comments

Comments
 (0)