From e39e0b86e69f42142e67efa9b495c2f70630ef19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Tue, 25 Mar 2025 15:49:31 +0100 Subject: [PATCH 1/3] Fix Mistral3/Gemma3 model hparams init --- convert_hf_to_gguf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index d9fa57027b771..1f2dcc5da9f7d 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1752,7 +1752,7 @@ class Mistral3Model(LlamaModel): # we need to merge the text_config into the root level of hparams def __init__(self, *args, **kwargs): - hparams = Model.load_hparams(kwargs["dir_model"]) + hparams = Model.load_hparams(args[0]) if "text_config" in hparams: hparams = {**hparams, **hparams["text_config"]} kwargs["hparams"] = hparams @@ -3385,7 +3385,7 @@ class Gemma3Model(Model): # we need to merge the text_config into the root level of hparams def __init__(self, *args, **kwargs): - hparams = Model.load_hparams(kwargs["dir_model"]) + hparams = Model.load_hparams(args[0]) if "text_config" in hparams: hparams = {**hparams, **hparams["text_config"]} kwargs["hparams"] = hparams From a3e791f405bdf8d516a09dd33f1c744feef1d4a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Tue, 25 Mar 2025 16:05:28 +0100 Subject: [PATCH 2/3] set positional args correctly --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 1f2dcc5da9f7d..b752c6c77a152 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5358,7 +5358,7 @@ def main() -> None: logger.error(f"Model {model_architecture} is not supported") sys.exit(1) - model_instance = model_class(dir_model=dir_model, ftype=output_type, fname_out=fname_out, + model_instance = model_class(dir_model, output_type, fname_out, is_big_endian=args.bigendian, use_temp_file=args.use_temp_file, eager=args.no_lazy, metadata_override=args.metadata, model_name=args.model_name, From 0682b4639ff8e25c7e7f607e3ea3dc9110775abd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Tue, 25 Mar 2025 22:46:05 +0100 Subject: [PATCH 3/3] use existing hparams if passed --- convert_hf_to_gguf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index b752c6c77a152..76ab4233ef2c1 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1752,7 +1752,7 @@ class Mistral3Model(LlamaModel): # we need to merge the text_config into the root level of hparams def __init__(self, *args, **kwargs): - hparams = Model.load_hparams(args[0]) + hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0]) if "text_config" in hparams: hparams = {**hparams, **hparams["text_config"]} kwargs["hparams"] = hparams @@ -3385,7 +3385,7 @@ class Gemma3Model(Model): # we need to merge the text_config into the root level of hparams def __init__(self, *args, **kwargs): - hparams = Model.load_hparams(args[0]) + hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0]) if "text_config" in hparams: hparams = {**hparams, **hparams["text_config"]} kwargs["hparams"] = hparams