From 6568836659bab974b53d4db28c6d9058a84329af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Romain=20=E2=80=9CArtefact2=E2=80=9D=20Dal=20Maso?= Date: Sat, 16 Mar 2024 21:36:06 +0100 Subject: [PATCH] convert : use f32 outtype for bf16 tensors The old behaviour is to use f16, but bf16 to f16 is not a lossless conversion. Change the outtype to f32 to default to a lossless conversion. --- convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert.py b/convert.py index 161430f3e717e..817cb66123a8f 100755 --- a/convert.py +++ b/convert.py @@ -1167,9 +1167,9 @@ def write_all( def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType: wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0) + ".weight"].data_type - if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32): + if output_type_str == "f32" or (output_type_str is None and wq_type in (DT_F32, DT_BF16)): return GGMLFileType.AllF32 - if output_type_str == "f16" or (output_type_str is None and wq_type in (DT_F16, DT_BF16)): + if output_type_str == "f16" or (output_type_str is None and wq_type == DT_F16): return GGMLFileType.MostlyF16 if output_type_str == "q8_0": return GGMLFileType.MostlyQ8_0