1
1
#include " ggml.h"
2
- #include " gguf-util.h"
3
2
#include " gguf-llama.h"
4
3
5
4
#include < cstdio>
@@ -21,133 +20,22 @@ static std::string to_string(const T & val) {
21
20
return ss.str ();
22
21
}
23
22
24
- void gguf_ex_write_str (std::ofstream & fout, const std::string & val) {
25
- const int32_t n = val.size ();
26
- fout.write ((const char *) &n, sizeof (n));
27
- fout.write (val.c_str (), n);
28
- }
29
-
30
- void gguf_ex_write_i32 (std::ofstream & fout, int32_t val) {
31
- fout.write ((const char *) &val, sizeof (val));
32
- }
33
-
34
- void gguf_ex_write_u64 (std::ofstream & fout, size_t val) {
35
- fout.write ((const char *) &val, sizeof (val));
36
- }
37
-
38
- template <typename T>
39
- void gguf_ex_write_val (std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
40
- gguf_ex_write_str (fout, key);
41
- fout.write ((const char *) &type, sizeof (type));
42
- fout.write ((const char *) &val, sizeof (val));
43
-
44
- fprintf (stdout, " %s: write param: %s = %s\n " , __func__, key.c_str (), to_string (val).c_str ());
45
- }
46
-
47
- template <>
48
- void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
49
- gguf_ex_write_str (fout, key);
50
- fout.write ((const char *) &type, sizeof (type));
51
-
52
- const int32_t n = val.size ();
53
- fout.write ((const char *) &n, sizeof (n));
54
- fout.write (val.c_str (), n);
55
-
56
- fprintf (stdout, " %s: write param: %s = %s\n " , __func__, key.c_str (), val.c_str ());
57
- }
58
-
59
- template <typename T>
60
- void gguf_ex_write_arr (std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) {
61
- gguf_ex_write_str (fout, key);
62
- {
63
- const enum gguf_type tarr = GGUF_TYPE_ARRAY;
64
- fout.write ((const char *) &tarr, sizeof (tarr));
65
- }
66
-
67
- const int32_t n = val.size ();
68
- fout.write ((const char *) &type, sizeof (type));
69
- fout.write ((const char *) &n, sizeof (n));
70
- fout.write ((const char *) val.data (), n * sizeof (T));
71
-
72
- fprintf (stdout, " %s: write param: %s = [" , __func__, key.c_str ());
73
- for (int i = 0 ; i < n; ++i) {
74
- fprintf (stdout, " %s" , to_string (val[i]).c_str ());
75
- if (i < n - 1 ) {
76
- fprintf (stdout, " , " );
77
- }
78
- }
79
- fprintf (stdout, " ]\n " );
80
- }
81
-
82
- template <>
83
- void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) {
84
- gguf_ex_write_str (fout, key);
85
- {
86
- const enum gguf_type tarr = GGUF_TYPE_ARRAY;
87
- fout.write ((const char *) &tarr, sizeof (tarr));
88
- }
89
-
90
- const int32_t n = val.size ();
91
- fout.write ((const char *) &type, sizeof (type));
92
- fout.write ((const char *) &n, sizeof (n));
93
- for (int i = 0 ; i < n; ++i) {
94
- const int32_t nstr = val[i].size ();
95
- fout.write ((const char *) &nstr, sizeof (nstr));
96
- fout.write (val[i].c_str (), nstr);
97
- }
98
-
99
- fprintf (stdout, " %s: write param: %s = [" , __func__, key.c_str ());
100
- for (int i = 0 ; i < n; ++i) {
101
- fprintf (stdout, " %s" , val[i].c_str ());
102
- if (i < n - 1 ) {
103
- fprintf (stdout, " , " );
104
- }
105
- }
106
- fprintf (stdout, " ]\n " );
107
- }
108
-
109
23
bool gguf_ex_write (const std::string & fname) {
110
- std::ofstream fout (fname.c_str (), std::ios::binary);
111
-
112
- {
113
- const int32_t magic = GGUF_MAGIC;
114
- fout.write ((const char *) &magic, sizeof (magic));
115
- }
116
-
117
- {
118
- const int32_t version = GGUF_VERSION;
119
- fout.write ((const char *) &version, sizeof (version));
120
- }
121
-
122
- // NOTE: these have to match the output below!
123
- const int n_tensors = 10 ;
124
- const int n_kv = 12 ;
125
-
126
- fout.write ((const char *) &n_tensors, sizeof (n_tensors));
127
- fout.write ((const char *) &n_kv, sizeof (n_kv));
128
-
129
- fprintf (stdout, " %s: write header\n " , __func__);
130
-
131
- // kv data
132
- {
133
- gguf_ex_write_val< uint8_t >(fout, " some.parameter.uint8" , GGUF_TYPE_UINT8, 0x12 );
134
- gguf_ex_write_val< int8_t >(fout, " some.parameter.int8" , GGUF_TYPE_INT8, -0x13 );
135
- gguf_ex_write_val<uint16_t >(fout, " some.parameter.uint16" , GGUF_TYPE_UINT16, 0x1234 );
136
- gguf_ex_write_val< int16_t >(fout, " some.parameter.int16" , GGUF_TYPE_INT16, -0x1235 );
137
- gguf_ex_write_val<uint32_t >(fout, " some.parameter.uint32" , GGUF_TYPE_UINT32, 0x12345678 );
138
- gguf_ex_write_val< int32_t >(fout, " some.parameter.int32" , GGUF_TYPE_INT32, -0x12345679 );
139
-
140
- gguf_ex_write_val<float > (fout, " some.parameter.float32" , GGUF_TYPE_FLOAT32, 0 .123456789f );
141
- gguf_ex_write_val<bool > (fout, " some.parameter.bool" , GGUF_TYPE_BOOL, true );
142
-
143
- gguf_ex_write_val<std::string>(fout, " some.parameter.string" , GGUF_TYPE_STRING, " hello world" );
144
-
145
- gguf_ex_write_arr<int16_t > (fout, " some.parameter.arr.i16" , GGUF_TYPE_INT16, { 1 , 2 , 3 , 4 , });
146
- gguf_ex_write_arr<float > (fout, " some.parameter.arr.f32" , GGUF_TYPE_FLOAT32, { 3 .145f , 2 .718f , 1 .414f , });
147
- gguf_ex_write_arr<std::string>(fout, " some.parameter.arr.str" , GGUF_TYPE_STRING, { " hello" , " world" , " !" });
148
- }
149
-
150
- uint64_t offset_tensor = 0 ;
24
+ struct gguf_context * ctx = gguf_init_empty ();
25
+
26
+ gguf_set_val_u8 (ctx, " some.parameter.uint8" , 0x12 );
27
+ gguf_set_val_i8 (ctx, " some.parameter.int8" , -0x13 );
28
+ gguf_set_val_u16 (ctx, " some.parameter.uint16" , 0x1234 );
29
+ gguf_set_val_i16 (ctx, " some.parameter.int16" , -0x1235 );
30
+ gguf_set_val_u32 (ctx, " some.parameter.uint32" , 0x12345678 );
31
+ gguf_set_val_i32 (ctx, " some.parameter.int32" , -0x12345679 );
32
+ gguf_set_val_f32 (ctx, " some.parameter.float32" , 0 .123456789f );
33
+ gguf_set_val_bool (ctx, " some.parameter.bool" , true );
34
+ gguf_set_val_str (ctx, " some.parameter.string" , " hello world" );
35
+
36
+ gguf_set_arr_data (ctx, " some.parameter.arr.i16" , GGUF_TYPE_INT16, std::vector<int16_t >{ 1 , 2 , 3 , 4 , }.data (), 4 );
37
+ gguf_set_arr_data (ctx, " some.parameter.arr.f32" , GGUF_TYPE_FLOAT32, std::vector<float >{ 3 .145f , 2 .718f , 1 .414f , }.data (), 3 );
38
+ gguf_set_arr_str (ctx, " some.parameter.arr.str" , std::vector<const char *>{ " hello" , " world" , " !" }.data (), 3 );
151
39
152
40
struct ggml_init_params params = {
153
41
/* .mem_size =*/ 128ull *1024ull *1024ull ,
@@ -157,6 +45,8 @@ bool gguf_ex_write(const std::string & fname) {
157
45
158
46
struct ggml_context * ctx_data = ggml_init (params);
159
47
48
+ const int n_tensors = 10 ;
49
+
160
50
// tensor infos
161
51
for (int i = 0 ; i < n_tensors; ++i) {
162
52
const std::string name = " tensor_" + to_string (i);
@@ -178,58 +68,15 @@ bool gguf_ex_write(const std::string & fname) {
178
68
}
179
69
}
180
70
181
- fprintf (stdout, " %s: tensor: %s, %d dims, ne = [" , __func__, name.c_str (), n_dims);
182
- for (int j = 0 ; j < 4 ; ++j) {
183
- fprintf (stdout, " %s%3d" , j == 0 ? " " : " , " , (int ) cur->ne [j]);
184
- }
185
- fprintf (stdout, " ], offset_tensor = %6" PRIu64 " \n " , offset_tensor);
186
-
187
- gguf_ex_write_str (fout, name);
188
- gguf_ex_write_i32 (fout, n_dims);
189
- for (int j = 0 ; j < n_dims; ++j) {
190
- gguf_ex_write_i32 (fout, cur->ne [j]);
191
- }
192
- gguf_ex_write_i32 (fout, cur->type );
193
- gguf_ex_write_u64 (fout, offset_tensor);
194
-
195
- offset_tensor += GGML_PAD (ggml_nbytes (cur), GGUF_DEFAULT_ALIGNMENT);
196
- }
197
-
198
- const uint64_t offset_data = GGML_PAD ((uint64_t ) fout.tellp (), GGUF_DEFAULT_ALIGNMENT);
199
-
200
- fprintf (stdout, " %s: data offset = %" PRIu64 " \n " , __func__, offset_data);
201
-
202
- {
203
- const size_t pad = offset_data - fout.tellp ();
204
-
205
- for (size_t j = 0 ; j < pad; ++j) {
206
- fout.put (0 );
207
- }
208
- }
209
-
210
- for (int i = 0 ; i < n_tensors; ++i) {
211
- fprintf (stdout, " %s: writing tensor %d data\n " , __func__, i);
212
-
213
- const std::string name = " tensor_" + to_string (i);
214
-
215
- struct ggml_tensor * cur = ggml_get_tensor (ctx_data, name.c_str ());
216
-
217
- fout.write ((const char *) cur->data , ggml_nbytes (cur));
218
-
219
- {
220
- const size_t pad = GGML_PAD (ggml_nbytes (cur), GGUF_DEFAULT_ALIGNMENT) - ggml_nbytes (cur);
221
-
222
- for (size_t j = 0 ; j < pad; ++j) {
223
- fout.put (0 );
224
- }
225
- }
71
+ gguf_add_tensor (ctx, cur);
226
72
}
227
73
228
- fout. close ( );
74
+ gguf_write_to_file (ctx, fname. c_str (), false );
229
75
230
76
fprintf (stdout, " %s: wrote file '%s;\n " , __func__, fname.c_str ());
231
77
232
78
ggml_free (ctx_data);
79
+ gguf_free (ctx);
233
80
234
81
return true ;
235
82
}
@@ -345,8 +192,16 @@ bool gguf_ex_read_1(const std::string & fname) {
345
192
346
193
struct ggml_tensor * cur = ggml_get_tensor (ctx_data, name);
347
194
348
- fprintf (stdout, " %s: tensor[%d]: n_dims = %d, name = %s, data = %p\n " ,
349
- __func__, i, cur->n_dims , cur->name , cur->data );
195
+ fprintf (stdout, " %s: tensor[%d]: n_dims = %d, name = %s, data = %p\n " , __func__, i, cur->n_dims , cur->name , cur->data );
196
+
197
+ // print first 10 elements
198
+ const float * data = (const float *) cur->data ;
199
+
200
+ printf (" %s data[:10] : " , name);
201
+ for (int j = 0 ; j < MIN (10 , ggml_nelements (cur)); ++j) {
202
+ printf (" %f " , data[j]);
203
+ }
204
+ printf (" \n\n " );
350
205
351
206
// check data
352
207
{
@@ -369,48 +224,6 @@ bool gguf_ex_read_1(const std::string & fname) {
369
224
return true ;
370
225
}
371
226
372
- // read just the tensor info and mmap the data in user code
373
- bool gguf_ex_read_2 (const std::string & fname) {
374
- struct ggml_context * ctx_data = NULL ;
375
-
376
- struct gguf_init_params params = {
377
- /* .no_alloc = */ true ,
378
- /* .ctx = */ &ctx_data,
379
- };
380
-
381
- struct gguf_context * ctx = gguf_init_from_file (fname.c_str (), params);
382
-
383
- struct gguf_file file (fname.c_str(), "rb");
384
- gguf_mmap data_mmap (&file, 0 , false );
385
-
386
- const int n_tensors = gguf_get_n_tensors (ctx);
387
-
388
- for (int i = 0 ; i < n_tensors; ++i) {
389
- const char * name = gguf_get_tensor_name (ctx, i);
390
- const size_t offset = gguf_get_data_offset (ctx) + gguf_get_tensor_offset (ctx, i);
391
-
392
- struct ggml_tensor * cur = ggml_get_tensor (ctx_data, name);
393
-
394
- cur->data = static_cast <char *>(data_mmap.addr ) + offset;
395
-
396
- // print first 10 elements
397
- const float * data = (const float *) cur->data ;
398
-
399
- printf (" %s data[:10] : " , name);
400
- for (int j = 0 ; j < MIN (10 , ggml_nelements (cur)); ++j) {
401
- printf (" %f " , data[j]);
402
- }
403
- printf (" \n\n " );
404
- }
405
-
406
- fprintf (stdout, " %s: ctx_data size: %zu\n " , __func__, ggml_get_mem_size (ctx_data));
407
-
408
- ggml_free (ctx_data);
409
- gguf_free (ctx);
410
-
411
- return true ;
412
- }
413
-
414
227
int main (int argc, char ** argv) {
415
228
if (argc < 3 ) {
416
229
fprintf (stdout, " usage: %s data.gguf r|w\n " , argv[0 ]);
@@ -427,7 +240,6 @@ int main(int argc, char ** argv) {
427
240
} else if (mode == " r" ) {
428
241
GGML_ASSERT (gguf_ex_read_0 (fname) && " failed to read gguf file" );
429
242
GGML_ASSERT (gguf_ex_read_1 (fname) && " failed to read gguf file" );
430
- GGML_ASSERT (gguf_ex_read_2 (fname) && " failed to read gguf file" );
431
243
} else if (mode == " q" ) {
432
244
llama_model_quantize_params params = llama_model_quantize_default_params ();
433
245
llama_model_quantize (fname.c_str (), " quant.gguf" , ¶ms);
0 commit comments