convert-llama-h5-to-gguf.py : clarify the reverse permute

klosax · web-flow · commit ea5615a03a0e · 2023-08-16T11:23:15.000+02:00
diff --git a/convert-llama-h5-to-gguf.py b/convert-llama-h5-to-gguf.py
@@ -18,7 +18,9 @@
 # compatible with python < 3.9
 NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
 
-def permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
+# reverse HF permute back to original pth layout
+# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py
+def reverse_hf_permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
     if n_kv_head is not None and n_head != n_kv_head: n_head //= n_kv_head
     return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
                 .swapaxes(1, 2)
@@ -219,9 +221,9 @@ def count_model_parts(dir_model: str) -> int:
 
         data = data.squeeze().numpy()
 
-        # permute these
+        # reverse permute these
         if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):
-            data = permute(data, head_count, head_count_kv)
+            data = reverse_hf_permute(data, head_count, head_count_kv)
 
         # map tensor names
         if name.endswith(".weight") and name[:-7] in tensor_map:
@@ -288,9 +290,9 @@ def count_model_parts(dir_model: str) -> int:
 
         data = data.squeeze().numpy()
 
-        # permute these
+        # reverse permute these
         if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):
-            data = permute(data, head_count, head_count_kv)
+            data = reverse_hf_permute(data, head_count, head_count_kv)
 
         # map tensor names
         if name.endswith(".weight") and name[:-7] in tensor_map: