Skip to content

Commit 36bff51

Browse files
committed
fix tokenizer.json tokenizer_config.json cpu()
1 parent 6c1c4b4 commit 36bff51

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

examples/minicpmv/minicpm-surgery.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import argparse
22
import glob
3-
import os
3+
import os, json
44
import torch
55
from transformers import AutoModel, AutoTokenizer
66

@@ -16,12 +16,12 @@
1616
mm_tensors = [k for k, v in checkpoint.items() if k.startswith("resampler")]
1717

1818
# store these tensors in a new dictionary and torch.save them
19-
projector = {name: checkpoint[name].float() for name in mm_tensors}
19+
projector = {name: checkpoint[name].float().cpu() for name in mm_tensors}
2020
torch.save(projector, f"{args.model}/llava.projector")
2121

2222
clip_tensors = [k for k, v in checkpoint.items() if k.startswith("vpm")]
2323
if len(clip_tensors) > 0:
24-
clip = {name.replace("vpm.", ""): checkpoint[name].float() for name in clip_tensors}
24+
clip = {name.replace("vpm.", ""): checkpoint[name].float().cpu() for name in clip_tensors}
2525
torch.save(clip, f"{args.model}/llava.clip")
2626

2727
# added tokens should be removed to be able to convert Mistral models
@@ -42,6 +42,15 @@
4242
tok = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
4343
tok.save_pretrained(f"{args.model}/MiniCPM")
4444
os.system(f"cp {args.model}/modeling_minicpm.py {args.model}/MiniCPM/modeling_minicpm.py")
45+
os.system(f"cp {args.model}/tokenizer.json {args.model}/MiniCPM/tokenizer.json")
46+
with open(f"{args.model}/MiniCPM/tokenizer_config.json", "r") as f:
47+
d = json.load(f)
48+
d.pop("auto_map")
49+
d["tokenizer_class"] = "LlamaTokenizer"
50+
d.pop("add_prefix_space")
51+
with open(f"{args.model}/MiniCPM/tokenizer_config.json", "w") as f:
52+
json.dump(d, f, indent=2)
53+
4554

4655
print("Done!")
4756
print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.")

0 commit comments

Comments
 (0)