Skip to content

Commit 6cbf9df

Browse files
committed
llama : shorten quantization descriptions
1 parent 7568d1a commit 6cbf9df

File tree

1 file changed

+19
-95
lines changed

1 file changed

+19
-95
lines changed

examples/quantize/quantize.cpp

Lines changed: 19 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -14,103 +14,27 @@ struct quant_option {
1414
};
1515

1616
static const std::vector<struct quant_option> QUANT_OPTIONS = {
17-
{
18-
"Q4_0",
19-
LLAMA_FTYPE_MOSTLY_Q4_0,
20-
" 3.50G, +0.2499 ppl @ 7B - small, very high quality loss - legacy, prefer using Q3_K_M",
21-
},
22-
{
23-
"Q4_1",
24-
LLAMA_FTYPE_MOSTLY_Q4_1,
25-
" 3.90G, +0.1846 ppl @ 7B - small, substantial quality loss - legacy, prefer using Q3_K_L",
26-
},
27-
{
28-
"Q5_0",
29-
LLAMA_FTYPE_MOSTLY_Q5_0,
30-
" 4.30G, +0.0796 ppl @ 7B - medium, balanced quality - legacy, prefer using Q4_K_M",
31-
},
32-
{
33-
"Q5_1",
34-
LLAMA_FTYPE_MOSTLY_Q5_1,
35-
" 4.70G, +0.0415 ppl @ 7B - medium, low quality loss - legacy, prefer using Q5_K_M",
36-
},
17+
{ "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 3.50G, +0.2499 ppl @ 7B", },
18+
{ "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1846 ppl @ 7B", },
19+
{ "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 4.30G, +0.0796 ppl @ 7B", },
20+
{ "Q5_1", LLAMA_FTYPE_MOSTLY_Q5_1, " 4.70G, +0.0415 ppl @ 7B", },
3721
#ifdef GGML_USE_K_QUANTS
38-
{
39-
"Q2_K",
40-
LLAMA_FTYPE_MOSTLY_Q2_K,
41-
" 2.67G, +0.8698 ppl @ 7B - smallest, extreme quality loss - not recommended",
42-
},
43-
{
44-
"Q3_K",
45-
LLAMA_FTYPE_MOSTLY_Q3_K_M,
46-
"alias for Q3_K_M"
47-
},
48-
{
49-
"Q3_K_S",
50-
LLAMA_FTYPE_MOSTLY_Q3_K_S,
51-
" 2.75G, +0.5505 ppl @ 7B - very small, very high quality loss",
52-
},
53-
{
54-
"Q3_K_M",
55-
LLAMA_FTYPE_MOSTLY_Q3_K_M,
56-
" 3.06G, +0.2437 ppl @ 7B - very small, very high quality loss",
57-
},
58-
{
59-
"Q3_K_L",
60-
LLAMA_FTYPE_MOSTLY_Q3_K_L,
61-
" 3.35G, +0.1803 ppl @ 7B - small, substantial quality loss",
62-
},
63-
{
64-
"Q4_K",
65-
LLAMA_FTYPE_MOSTLY_Q4_K_M,
66-
"alias for Q4_K_M",
67-
},
68-
{
69-
"Q4_K_S",
70-
LLAMA_FTYPE_MOSTLY_Q4_K_S,
71-
" 3.56G, +0.1149 ppl @ 7B - small, significant quality loss",
72-
},
73-
{
74-
"Q4_K_M",
75-
LLAMA_FTYPE_MOSTLY_Q4_K_M,
76-
" 3.80G, +0.0535 ppl @ 7B - medium, balanced quality - *recommended*",
77-
},
78-
{
79-
"Q5_K",
80-
LLAMA_FTYPE_MOSTLY_Q5_K_M,
81-
"alias for Q5_K_M",
82-
},
83-
{
84-
"Q5_K_S",
85-
LLAMA_FTYPE_MOSTLY_Q5_K_S,
86-
" 4.33G, +0.0353 ppl @ 7B - large, low quality loss - *recommended*",
87-
},
88-
{
89-
"Q5_K_M",
90-
LLAMA_FTYPE_MOSTLY_Q5_K_M,
91-
" 4.45G, +0.0142 ppl @ 7B - large, very low quality loss - *recommended*",
92-
},
93-
{
94-
"Q6_K",
95-
LLAMA_FTYPE_MOSTLY_Q6_K,
96-
" 5.15G, +0.0044 ppl @ 7B - very large, extremely low quality loss",
97-
},
22+
{ "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.67G, +0.8698 ppl @ 7B", },
23+
{ "Q3_K", LLAMA_FTYPE_MOSTLY_Q3_K_M, "alias for Q3_K_M" },
24+
{ "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S, " 2.75G, +0.5505 ppl @ 7B", },
25+
{ "Q3_K_M", LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.06G, +0.2437 ppl @ 7B", },
26+
{ "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L, " 3.35G, +0.1803 ppl @ 7B", },
27+
{ "Q4_K", LLAMA_FTYPE_MOSTLY_Q4_K_M, "alias for Q4_K_M", },
28+
{ "Q4_K_S", LLAMA_FTYPE_MOSTLY_Q4_K_S, " 3.56G, +0.1149 ppl @ 7B", },
29+
{ "Q4_K_M", LLAMA_FTYPE_MOSTLY_Q4_K_M, " 3.80G, +0.0535 ppl @ 7B", },
30+
{ "Q5_K", LLAMA_FTYPE_MOSTLY_Q5_K_M, "alias for Q5_K_M", },
31+
{ "Q5_K_S", LLAMA_FTYPE_MOSTLY_Q5_K_S, " 4.33G, +0.0353 ppl @ 7B", },
32+
{ "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0142 ppl @ 7B", },
33+
{ "Q6_K", LLAMA_FTYPE_MOSTLY_Q6_K, " 5.15G, +0.0044 ppl @ 7B", },
9834
#endif
99-
{
100-
"Q8_0",
101-
LLAMA_FTYPE_MOSTLY_Q8_0,
102-
" 6.70G, +0.0004 ppl @ 7B - very large, extremely low quality loss - not recommended",
103-
},
104-
{
105-
"F16",
106-
LLAMA_FTYPE_MOSTLY_F16,
107-
"13.00G @ 7B - extremely large, virtually no quality loss - not recommended",
108-
},
109-
{
110-
"F32",
111-
LLAMA_FTYPE_ALL_F32,
112-
"26.00G @ 7B - absolutely huge, lossless - not recommended",
113-
},
35+
{ "Q8_0", LLAMA_FTYPE_MOSTLY_Q8_0, " 6.70G, +0.0004 ppl @ 7B", },
36+
{ "F16", LLAMA_FTYPE_MOSTLY_F16, "13.00G @ 7B", },
37+
{ "F32", LLAMA_FTYPE_ALL_F32, "26.00G @ 7B", },
11438
};
11539

11640

0 commit comments

Comments
 (0)