Skip to content

Commit 1f945ab

Browse files
Artyom17Chillee
authored andcommitted
Adding Mistral-7B support (#116)
1 parent 88873a6 commit 1f945ab

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ meta-llama/Llama-2-13b-chat-hf
5959
meta-llama/Llama-2-70b-chat-hf
6060
codellama/CodeLlama-7b-Python-hf
6161
codellama/CodeLlama-34b-Python-hf
62+
mistralai/Mistral-7B-v0.1
63+
mistralai/Mistral-7B-Instruct-v0.1
64+
mistralai/Mistral-7B-Instruct-v0.2
6265
```
6366

6467
For example, to convert Llama-2-7b-chat-hf

model.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,13 @@ def from_name(cls, name: str):
4545
return cls(**transformer_configs[name])
4646
# fuzzy search
4747
config = [config for config in transformer_configs if config in str(name).upper() or config in str(name)]
48-
assert len(config) == 1, name
48+
49+
# We may have two or more configs matched (e.g. "7B" and "Mistral-7B"). Find the best config match,
50+
# take longer name (as it have more symbols matched)
51+
if len(config) > 1:
52+
config.sort(key=len, reverse=True)
53+
assert len(config[0]) != len(config[1]), name # make sure only one 'best' match
54+
4955
return cls(**transformer_configs[config[0]])
5056

5157

@@ -56,6 +62,7 @@ def from_name(cls, name: str):
5662
"30B": dict(n_layer=60, n_head=52, dim=6656),
5763
"34B": dict(n_layer=48, n_head=64, dim=8192, vocab_size=32000, n_local_heads=8, intermediate_size=22016, rope_base=1000000), # CodeLlama-34B-Python-hf
5864
"70B": dict(n_layer=80, n_head=64, dim=8192, n_local_heads=8, intermediate_size=28672),
65+
"Mistral-7B": dict(n_layer=32, n_head=32, n_local_heads=8, dim=4096, intermediate_size=14336, vocab_size=32000),
5966
}
6067

6168
class KVCache(nn.Module):

0 commit comments

Comments
 (0)