|
13 | 13 | from enum import IntEnum
|
14 | 14 | from pathlib import Path
|
15 | 15 | from hashlib import sha256
|
16 |
| -from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Sequence, TypeVar, cast |
| 16 | +from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast |
17 | 17 |
|
18 | 18 | import math
|
19 | 19 | import numpy as np
|
@@ -677,6 +677,51 @@ def _set_vocab_llama_hf(self):
|
677 | 677 | special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
678 | 678 | special_vocab.add_to_gguf(self.gguf_writer)
|
679 | 679 |
|
| 680 | + def _set_vocab_builtin(self, model_name: Literal["gpt-neox", "llama-spm"], vocab_size: int): |
| 681 | + tokenizer_path = Path(sys.path[0]) / "models" / f"ggml-vocab-{model_name}.gguf" |
| 682 | + logger.warning(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'") |
| 683 | + vocab_reader = gguf.GGUFReader(tokenizer_path, "r") |
| 684 | + |
| 685 | + default_pre = "mpt" if model_name == "gpt-neox" else "default" |
| 686 | + |
| 687 | + field = vocab_reader.get_field(gguf.Keys.Tokenizer.MODEL) |
| 688 | + assert field # tokenizer model |
| 689 | + self.gguf_writer.add_tokenizer_model(bytes(field.parts[-1]).decode("utf-8")) |
| 690 | + |
| 691 | + field = vocab_reader.get_field(gguf.Keys.Tokenizer.PRE) |
| 692 | + self.gguf_writer.add_tokenizer_pre(bytes(field.parts[-1]).decode("utf-8") if field else default_pre) |
| 693 | + |
| 694 | + field = vocab_reader.get_field(gguf.Keys.Tokenizer.LIST) |
| 695 | + assert field # token list |
| 696 | + self.gguf_writer.add_token_list([bytes(field.parts[i]) for i in field.data][:vocab_size]) |
| 697 | + |
| 698 | + if model_name == "llama-spm": |
| 699 | + field = vocab_reader.get_field(gguf.Keys.Tokenizer.SCORES) |
| 700 | + assert field # token scores |
| 701 | + self.gguf_writer.add_token_scores([field.parts[i].tolist()[0] for i in field.data][:vocab_size]) |
| 702 | + |
| 703 | + field = vocab_reader.get_field(gguf.Keys.Tokenizer.TOKEN_TYPE) |
| 704 | + assert field # token types |
| 705 | + self.gguf_writer.add_token_types([field.parts[i].tolist()[0] for i in field.data][:vocab_size]) |
| 706 | + |
| 707 | + if model_name != "llama-spm": |
| 708 | + field = vocab_reader.get_field(gguf.Keys.Tokenizer.MERGES) |
| 709 | + assert field # token merges |
| 710 | + self.gguf_writer.add_token_merges([bytes(field.parts[i]) for i in field.data]) |
| 711 | + |
| 712 | + if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.BOS_ID)) is not None: |
| 713 | + self.gguf_writer.add_bos_token_id(field.parts[-1].tolist()[0]) |
| 714 | + if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.EOS_ID)) is not None: |
| 715 | + self.gguf_writer.add_eos_token_id(field.parts[-1].tolist()[0]) |
| 716 | + if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.UNK_ID)) is not None: |
| 717 | + self.gguf_writer.add_unk_token_id(field.parts[-1].tolist()[0]) |
| 718 | + if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.PAD_ID)) is not None: |
| 719 | + self.gguf_writer.add_pad_token_id(field.parts[-1].tolist()[0]) |
| 720 | + if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.ADD_BOS)) is not None: |
| 721 | + self.gguf_writer.add_add_bos_token(field.parts[-1].tolist()[0]) |
| 722 | + if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.ADD_EOS)) is not None: |
| 723 | + self.gguf_writer.add_add_eos_token(field.parts[-1].tolist()[0]) |
| 724 | + |
680 | 725 |
|
681 | 726 | @Model.register("GPTNeoXForCausalLM")
|
682 | 727 | class GPTNeoXModel(Model):
|
@@ -2439,39 +2484,7 @@ def set_vocab(self):
|
2439 | 2484 | self._set_vocab_sentencepiece()
|
2440 | 2485 | else:
|
2441 | 2486 | # Use the GPT-NeoX tokenizer when no tokenizer files are present
|
2442 |
| - tokenizer_path = Path(sys.path[0]) / "models" / "ggml-vocab-gpt-neox.gguf" |
2443 |
| - logger.warning(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'") |
2444 |
| - neox_reader = gguf.GGUFReader(tokenizer_path, "r") |
2445 |
| - |
2446 |
| - field = neox_reader.get_field(gguf.Keys.Tokenizer.MODEL) |
2447 |
| - self.gguf_writer.add_tokenizer_model(bytes(field.parts[-1]).decode("utf-8") if field else "gpt2") |
2448 |
| - |
2449 |
| - field = neox_reader.get_field(gguf.Keys.Tokenizer.PRE) |
2450 |
| - self.gguf_writer.add_tokenizer_pre(bytes(field.parts[-1]).decode("utf-8") if field else "mpt") |
2451 |
| - |
2452 |
| - field = neox_reader.get_field(gguf.Keys.Tokenizer.LIST) |
2453 |
| - assert field |
2454 |
| - self.gguf_writer.add_token_list([bytes(field.parts[i]) for i in field.data][:vocab_size]) |
2455 |
| - |
2456 |
| - field = neox_reader.get_field(gguf.Keys.Tokenizer.TOKEN_TYPE) |
2457 |
| - assert field |
2458 |
| - self.gguf_writer.add_token_types([field.parts[i].tolist()[0] for i in field.data][:vocab_size]) |
2459 |
| - |
2460 |
| - field = neox_reader.get_field(gguf.Keys.Tokenizer.MERGES) |
2461 |
| - assert field |
2462 |
| - self.gguf_writer.add_token_merges([bytes(field.parts[i]) for i in field.data]) |
2463 |
| - |
2464 |
| - field = neox_reader.get_field(gguf.Keys.Tokenizer.BOS_ID) |
2465 |
| - self.gguf_writer.add_bos_token_id(field.parts[-1].tolist()[0] if field else 1) |
2466 |
| - |
2467 |
| - field = neox_reader.get_field(gguf.Keys.Tokenizer.EOS_ID) |
2468 |
| - self.gguf_writer.add_eos_token_id(field.parts[-1].tolist()[0] if field else 0) |
2469 |
| - |
2470 |
| - field = neox_reader.get_field(gguf.Keys.Tokenizer.UNK_ID) |
2471 |
| - self.gguf_writer.add_unk_token_id(field.parts[-1].tolist()[0] if field else 0) |
2472 |
| - |
2473 |
| - field = neox_reader.get_field(gguf.Keys.Tokenizer.PAD_ID) |
2474 |
| - self.gguf_writer.add_pad_token_id(field.parts[-1].tolist()[0] if field else 0) |
| 2487 | + self._set_vocab_builtin("gpt-neox", vocab_size) |
2475 | 2488 |
|
2476 | 2489 | def set_gguf_parameters(self):
|
2477 | 2490 | d_model = self.find_hparam(["hidden_size", "d_model"])
|
@@ -2623,6 +2636,82 @@ def set_vocab(self, *args, **kwargs):
|
2623 | 2636 | self.gguf_writer.add_add_eos_token(True)
|
2624 | 2637 |
|
2625 | 2638 |
|
| 2639 | +@Model.register("OpenELMForCausalLM") |
| 2640 | +class OpenELMModel(Model): |
| 2641 | + model_arch = gguf.MODEL_ARCH.OPENELM |
| 2642 | + |
| 2643 | + @staticmethod |
| 2644 | + def _make_divisible(v: float | int, divisor: int) -> int: |
| 2645 | + # ref: https://huggingface.co/apple/OpenELM-270M-Instruct/blob/eb111ff2e6724348e5b905984063d4064d4bc579/configuration_openelm.py#L34-L38 |
| 2646 | + new_v = max(divisor, int(v + divisor / 2) // divisor * divisor) |
| 2647 | + # Make sure that round down does not go down by more than 10%. |
| 2648 | + if new_v < 0.9 * v: |
| 2649 | + new_v += divisor |
| 2650 | + return new_v |
| 2651 | + |
| 2652 | + def __init__(self, *args, **kwargs): |
| 2653 | + super().__init__(*args, **kwargs) |
| 2654 | + |
| 2655 | + ffn_multipliers: list[float] = self.hparams["ffn_multipliers"] |
| 2656 | + ffn_dim_divisor: int = self.hparams["ffn_dim_divisor"] |
| 2657 | + self._n_embd: int = self.hparams["model_dim"] |
| 2658 | + self._num_kv_heads: list[int] = self.hparams["num_kv_heads"] |
| 2659 | + self._num_query_heads: list[int] = self.hparams["num_query_heads"] |
| 2660 | + self._ffn_dims: list[int] = [ |
| 2661 | + OpenELMModel._make_divisible(multiplier * self._n_embd, ffn_dim_divisor) |
| 2662 | + for multiplier in ffn_multipliers |
| 2663 | + ] |
| 2664 | + assert isinstance(self._num_kv_heads, list) and isinstance(self._num_kv_heads[0], int) |
| 2665 | + assert isinstance(self._num_query_heads, list) and isinstance(self._num_query_heads[0], int) |
| 2666 | + |
| 2667 | + # Uses the tokenizer from meta-llama/Llama-2-7b-hf |
| 2668 | + def set_vocab(self): |
| 2669 | + try: |
| 2670 | + self._set_vocab_sentencepiece() |
| 2671 | + except FileNotFoundError: |
| 2672 | + self._set_vocab_builtin("llama-spm", self.hparams["vocab_size"]) |
| 2673 | + |
| 2674 | + def set_gguf_parameters(self): |
| 2675 | + n_embd = self._n_embd |
| 2676 | + head_dim = self.hparams["head_dim"] |
| 2677 | + rot_pct = 1.0 |
| 2678 | + assert self.block_count == len(self._num_kv_heads) |
| 2679 | + assert self.block_count == len(self._num_query_heads) |
| 2680 | + assert self.block_count == len(self._ffn_dims) |
| 2681 | + |
| 2682 | + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) |
| 2683 | + self.gguf_writer.add_block_count(self.block_count) |
| 2684 | + self.gguf_writer.add_context_length(self.hparams["max_context_length"]) |
| 2685 | + self.gguf_writer.add_embedding_length(n_embd) |
| 2686 | + self.gguf_writer.add_feed_forward_length(self._ffn_dims) |
| 2687 | + self.gguf_writer.add_head_count(self._num_query_heads) |
| 2688 | + self.gguf_writer.add_head_count_kv(self._num_kv_heads) |
| 2689 | + self.gguf_writer.add_rope_freq_base(self.hparams["rope_freq_constant"]) |
| 2690 | + # https://huggingface.co/apple/OpenELM-270M-Instruct/blob/c401df2/modeling_openelm.py#L30 |
| 2691 | + self.gguf_writer.add_layer_norm_rms_eps(1e-6) |
| 2692 | + self.gguf_writer.add_rope_dimension_count(int(rot_pct * head_dim)) |
| 2693 | + self.gguf_writer.add_key_length(head_dim) |
| 2694 | + self.gguf_writer.add_value_length(head_dim) |
| 2695 | + self.gguf_writer.add_file_type(self.ftype) |
| 2696 | + |
| 2697 | + def find_hparam(self, keys: Iterable[str], optional: bool = False) -> Any: |
| 2698 | + if "n_layers" in keys: |
| 2699 | + return self.hparams["num_transformer_layers"] |
| 2700 | + |
| 2701 | + return super().find_hparam(keys, optional) |
| 2702 | + |
| 2703 | + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: |
| 2704 | + |
| 2705 | + # split ff |
| 2706 | + if bid is not None and name == f"transformer.layers.{bid}.ffn.proj_1.weight": |
| 2707 | + ff_dim = self._ffn_dims[bid] |
| 2708 | + yield (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), data_torch[:ff_dim]) |
| 2709 | + yield (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), data_torch[ff_dim:]) |
| 2710 | + return |
| 2711 | + |
| 2712 | + yield (self.map_tensor_name(name), data_torch) |
| 2713 | + |
| 2714 | + |
2626 | 2715 | @Model.register("ArcticForCausalLM")
|
2627 | 2716 | class ArcticModel(Model):
|
2628 | 2717 | model_arch = gguf.MODEL_ARCH.ARCTIC
|
|
0 commit comments