-
Notifications
You must be signed in to change notification settings - Fork 12k
convert : improve model arch handling #13122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
e8b00ed
4840d2f
d11dccb
c3dbfb6
e5c5fd7
1a0485d
a21e755
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,8 +66,6 @@ class ModelBase: | |
part_names: list[str] | ||
is_safetensors: bool | ||
hparams: dict[str, Any] | ||
block_count: int | ||
tensor_map: gguf.TensorNameMap | ||
tensor_names: set[str] | None | ||
gguf_writer: gguf.GGUFWriter | ||
model_name: str | None | ||
|
@@ -78,6 +76,10 @@ class ModelBase: | |
# subclasses should define this! | ||
model_arch: gguf.MODEL_ARCH | ||
|
||
# subclasses should initialize this! | ||
block_count: int | ||
tensor_map: gguf.TensorNameMap | ||
|
||
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False, | ||
use_temp_file: bool = False, eager: bool = False, | ||
metadata_override: Path | None = None, model_name: str | None = None, | ||
|
@@ -113,8 +115,6 @@ def get_remote_tensors() -> Iterator[tuple[str, Tensor]]: | |
if not self.is_safetensors: | ||
self.part_names = ModelBase.get_model_part_names(self.dir_model, "pytorch_model", ".bin") | ||
self.hparams = ModelBase.load_hparams(self.dir_model) if hparams is None else hparams | ||
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) | ||
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) | ||
self.tensor_names = None | ||
self.metadata_override = metadata_override | ||
self.model_name = model_name | ||
|
@@ -418,14 +418,7 @@ def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str] | |
@staticmethod | ||
def load_hparams(dir_model: Path): | ||
with open(dir_model / "config.json", "r", encoding="utf-8") as f: | ||
hparams = json.load(f) | ||
architectures = hparams.get("architectures") | ||
if "text_config" in hparams: | ||
hparams = {**hparams, **hparams["text_config"]} | ||
if architectures is not None: | ||
# preserve "architectures" from root level config | ||
hparams["architectures"] = architectures | ||
return hparams | ||
return json.load(f) | ||
|
||
@classmethod | ||
def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]: | ||
|
@@ -454,6 +447,16 @@ def from_model_architecture(cls, arch: str, model_type = ModelType.TEXT) -> type | |
|
||
|
||
class TextModel(ModelBase): | ||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
|
||
if "text_config" in self.hparams: | ||
# move the text_config to the root level | ||
self.hparams = {**self.hparams, **self.hparams["text_config"]} | ||
|
||
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) | ||
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) | ||
|
||
@classmethod | ||
def __init_subclass__(cls): | ||
# can't use an abstract property, because overriding it without type errors | ||
|
@@ -1078,8 +1081,12 @@ def __init__(self, *args, **kwargs): | |
raise TypeError("VisionModel must be subclassed with model_arch = gguf.MODEL_ARCH.CLIP_VISION") | ||
|
||
# small hack to correct the number of layers | ||
self.tensor_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.CLIP_VISION, 128) | ||
self.n_embd_text = self.find_hparam(["hidden_size", "n_embd"]) | ||
self.block_count = 512 # vision models are small, this "ought to be enough for anybody" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "small" with up to 512 layers :) I think this number could be taken from the config, since What model doesn't specify how many layers the vision part has? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I never seen any model having more than 64 layers, but I'm just putting this number here for future proof. We could get this number from the config, but the problem is that many The frustrating thing is that this start to happen on some One way to fix this could be to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That could be the way to go, since
I guess if this is a problem, (e.g. for very new architectures), it could always be possible to temporarily define a But! Something which seems important is that I guess it's probably fine to keep a high block count, but it makes the tensor map dict bigger than it needs to be. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It should not change much, The simple plan is to replace There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is what I mean: 4840d2f It does work better now, |
||
self.tensor_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.CLIP_VISION, self.block_count) | ||
|
||
# get n_embd of the text model | ||
text_config = {**self.hparams, **self.hparams["text_config"]} | ||
self.n_embd_text = text_config.get("hidden_size", text_config.get("n_embd", 0)) | ||
assert self.n_embd_text > 0, "n_embd not found in hparams" | ||
|
||
if "vision_config" not in self.hparams: | ||
|
@@ -1726,20 +1733,20 @@ def prepare_tensors(self): | |
"LlamaForCausalLM", | ||
"MistralForCausalLM", | ||
"MixtralForCausalLM", | ||
"Idefics3ForConditionalGeneration", | ||
"SmolVLMForConditionalGeneration", | ||
"VLlama3ForCausalLM", | ||
"LlavaForConditionalGeneration") | ||
class LlamaModel(TextModel): | ||
model_arch = gguf.MODEL_ARCH.LLAMA | ||
undo_permute = True | ||
|
||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
arch = get_model_architecture(self.dir_model, ModelType.TEXT, self.hparams) | ||
# fix for SmolVLM2, missing `num_attention_heads` in config.json | ||
if self.hparams["architectures"][0] == "SmolVLMForConditionalGeneration": | ||
if arch == "VLlama3ForCausalLM": | ||
self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32) | ||
# fix for Pixtral, missing `num_attention_heads` in config.json | ||
if self.hparams["architectures"][0] == "LlavaForConditionalGeneration" \ | ||
if arch == "LlavaForConditionalGeneration" \ | ||
and self.hparams.get("model_type") == "mistral": | ||
self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32) | ||
|
||
|
@@ -5805,6 +5812,19 @@ def split_str_to_n_bytes(split_str: str) -> int: | |
return n | ||
|
||
|
||
def get_model_architecture(dir_model: Path, model_type: ModelType, hparams: Any = None) -> str: | ||
hparams = ModelBase.load_hparams(dir_model) if hparams is None else hparams | ||
text_config = hparams.get("text_config", {}) | ||
vision_config = hparams.get("vision_config", {}) | ||
arch = hparams["architectures"][0] | ||
# if "architectures" is found in the sub-config, use that instead | ||
if model_type == ModelType.TEXT and text_config.get("architectures") is not None: | ||
arch = text_config["architectures"][0] | ||
elif model_type == ModelType.VISION and vision_config.get("architectures") is not None: | ||
arch = vision_config["architectures"][0] | ||
return arch | ||
Comment on lines
+5856
to
+5866
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @compilade In this case, maybe we can patch Maybe something like if "ssm_cfg" in hparams and hparams.get("ssm_cfg").get("layer") == "Mamba":
return "MambaForCausalLM" And since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ngxson I've tried this, and for some reason it doesn't work; it seems like AutoConfig doesn't fail when there is no The error I'm getting suggests It's as if AutoConfig can still detect it's Mamba2, but doesn't use the correct values from the config. Here's what I've trieddiff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 532cc879d..4a3713e4d 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -5968,12 +5968,21 @@ def get_model_architecture(dir_model: Path, model_type: ModelType, hparams: Any
hparams = ModelBase.load_hparams(dir_model) if hparams is None else hparams
text_config = hparams.get("text_config", {})
vision_config = hparams.get("vision_config", {})
- arch = hparams["architectures"][0]
+ arch = None
+ if (arches := hparams.get("architectures")) is not None and len(arches) > 0:
+ arch = arches[0]
+ elif "ssm_cfg" in hparams:
+ # TODO: more general extra mappings
+ ssm_mapping = {"Mamba": "MambaForCausalLM", "Mamba2": "Mamba2ForCausalLM"}
+ arch = ssm_mapping.get(hparams["ssm_cfg"].get("layer", "Mamba"), None)
+
# if "architectures" is found in the sub-config, use that instead
if model_type == ModelType.TEXT and text_config.get("architectures") is not None:
arch = text_config["architectures"][0]
elif model_type == ModelType.VISION and vision_config.get("architectures") is not None:
arch = vision_config["architectures"][0]
+ if arch is None:
+ raise ValueError("Failed to detect model architecture")
return arch And then converting https://huggingface.co/state-spaces/mamba2-370m. (when using #9126) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Hmm this is quite magic tbh, I have no idea how AutoConfig works under the hood. Another solution though, we can add an argument in |
||
|
||
|
||
def main() -> None: | ||
args = parse_args() | ||
|
||
|
@@ -5857,16 +5877,15 @@ def main() -> None: | |
|
||
logger.info(f"Loading model: {dir_model.name}") | ||
|
||
hparams = ModelBase.load_hparams(dir_model) | ||
|
||
if args.mmproj: | ||
if "mmproj" not in fname_out.name: | ||
fname_out = ModelBase.add_prefix_to_filename(fname_out, "mmproj-") | ||
|
||
with torch.inference_mode(): | ||
output_type = ftype_map[args.outtype] | ||
model_architecture = hparams["architectures"][0] | ||
model_type = ModelType.VISION if args.mmproj else ModelType.TEXT | ||
model_architecture = get_model_architecture(dir_model, model_type) | ||
logger.info(f"Model architecture: {model_architecture}") | ||
try: | ||
model_class = ModelBase.from_model_architecture(model_architecture, model_type=model_type) | ||
except NotImplementedError: | ||
|
Uh oh!
There was an error while loading. Please reload this page.