@@ -14,103 +14,27 @@ struct quant_option {
14
14
};
15
15
16
16
static const std::vector<struct quant_option > QUANT_OPTIONS = {
17
- {
18
- " Q4_0" ,
19
- LLAMA_FTYPE_MOSTLY_Q4_0,
20
- " 3.50G, +0.2499 ppl @ 7B - small, very high quality loss - legacy, prefer using Q3_K_M" ,
21
- },
22
- {
23
- " Q4_1" ,
24
- LLAMA_FTYPE_MOSTLY_Q4_1,
25
- " 3.90G, +0.1846 ppl @ 7B - small, substantial quality loss - legacy, prefer using Q3_K_L" ,
26
- },
27
- {
28
- " Q5_0" ,
29
- LLAMA_FTYPE_MOSTLY_Q5_0,
30
- " 4.30G, +0.0796 ppl @ 7B - medium, balanced quality - legacy, prefer using Q4_K_M" ,
31
- },
32
- {
33
- " Q5_1" ,
34
- LLAMA_FTYPE_MOSTLY_Q5_1,
35
- " 4.70G, +0.0415 ppl @ 7B - medium, low quality loss - legacy, prefer using Q5_K_M" ,
36
- },
17
+ { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 3.50G, +0.2499 ppl @ 7B" , },
18
+ { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1846 ppl @ 7B" , },
19
+ { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 4.30G, +0.0796 ppl @ 7B" , },
20
+ { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 4.70G, +0.0415 ppl @ 7B" , },
37
21
#ifdef GGML_USE_K_QUANTS
38
- {
39
- " Q2_K" ,
40
- LLAMA_FTYPE_MOSTLY_Q2_K,
41
- " 2.67G, +0.8698 ppl @ 7B - smallest, extreme quality loss - not recommended" ,
42
- },
43
- {
44
- " Q3_K" ,
45
- LLAMA_FTYPE_MOSTLY_Q3_K_M,
46
- " alias for Q3_K_M"
47
- },
48
- {
49
- " Q3_K_S" ,
50
- LLAMA_FTYPE_MOSTLY_Q3_K_S,
51
- " 2.75G, +0.5505 ppl @ 7B - very small, very high quality loss" ,
52
- },
53
- {
54
- " Q3_K_M" ,
55
- LLAMA_FTYPE_MOSTLY_Q3_K_M,
56
- " 3.06G, +0.2437 ppl @ 7B - very small, very high quality loss" ,
57
- },
58
- {
59
- " Q3_K_L" ,
60
- LLAMA_FTYPE_MOSTLY_Q3_K_L,
61
- " 3.35G, +0.1803 ppl @ 7B - small, substantial quality loss" ,
62
- },
63
- {
64
- " Q4_K" ,
65
- LLAMA_FTYPE_MOSTLY_Q4_K_M,
66
- " alias for Q4_K_M" ,
67
- },
68
- {
69
- " Q4_K_S" ,
70
- LLAMA_FTYPE_MOSTLY_Q4_K_S,
71
- " 3.56G, +0.1149 ppl @ 7B - small, significant quality loss" ,
72
- },
73
- {
74
- " Q4_K_M" ,
75
- LLAMA_FTYPE_MOSTLY_Q4_K_M,
76
- " 3.80G, +0.0535 ppl @ 7B - medium, balanced quality - *recommended*" ,
77
- },
78
- {
79
- " Q5_K" ,
80
- LLAMA_FTYPE_MOSTLY_Q5_K_M,
81
- " alias for Q5_K_M" ,
82
- },
83
- {
84
- " Q5_K_S" ,
85
- LLAMA_FTYPE_MOSTLY_Q5_K_S,
86
- " 4.33G, +0.0353 ppl @ 7B - large, low quality loss - *recommended*" ,
87
- },
88
- {
89
- " Q5_K_M" ,
90
- LLAMA_FTYPE_MOSTLY_Q5_K_M,
91
- " 4.45G, +0.0142 ppl @ 7B - large, very low quality loss - *recommended*" ,
92
- },
93
- {
94
- " Q6_K" ,
95
- LLAMA_FTYPE_MOSTLY_Q6_K,
96
- " 5.15G, +0.0044 ppl @ 7B - very large, extremely low quality loss" ,
97
- },
22
+ { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.67G, +0.8698 ppl @ 7B" , },
23
+ { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
24
+ { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 2.75G, +0.5505 ppl @ 7B" , },
25
+ { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.06G, +0.2437 ppl @ 7B" , },
26
+ { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L, " 3.35G, +0.1803 ppl @ 7B" , },
27
+ { " Q4_K" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " alias for Q4_K_M" , },
28
+ { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S, " 3.56G, +0.1149 ppl @ 7B" , },
29
+ { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " 3.80G, +0.0535 ppl @ 7B" , },
30
+ { " Q5_K" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " alias for Q5_K_M" , },
31
+ { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S, " 4.33G, +0.0353 ppl @ 7B" , },
32
+ { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0142 ppl @ 7B" , },
33
+ { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 5.15G, +0.0044 ppl @ 7B" , },
98
34
#endif
99
- {
100
- " Q8_0" ,
101
- LLAMA_FTYPE_MOSTLY_Q8_0,
102
- " 6.70G, +0.0004 ppl @ 7B - very large, extremely low quality loss - not recommended" ,
103
- },
104
- {
105
- " F16" ,
106
- LLAMA_FTYPE_MOSTLY_F16,
107
- " 13.00G @ 7B - extremely large, virtually no quality loss - not recommended" ,
108
- },
109
- {
110
- " F32" ,
111
- LLAMA_FTYPE_ALL_F32,
112
- " 26.00G @ 7B - absolutely huge, lossless - not recommended" ,
113
- },
35
+ { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 6.70G, +0.0004 ppl @ 7B" , },
36
+ { " F16" , LLAMA_FTYPE_MOSTLY_F16, " 13.00G @ 7B" , },
37
+ { " F32" , LLAMA_FTYPE_ALL_F32, " 26.00G @ 7B" , },
114
38
};
115
39
116
40
0 commit comments