1
1
#include " llama.h"
2
- #include < assert.h >
3
- #include < math.h >
2
+ #include < cassert >
3
+ #include < cmath >
4
4
#include < numeric>
5
5
#include < cassert>
6
6
#include < iostream>
7
7
#include < vector>
8
8
#include < algorithm>
9
9
10
- #undef assert
11
- #define assert (__expr ) do { if (!(__expr)) { printf (" %s:%d (%s) %s\n " , __FILE__, __LINE__, __func__, #__expr); exit (1 ); } } while (0 )
12
10
13
11
void dump (const llama_token_data_array * candidates) {
14
12
for (size_t i = 0 ; i < candidates->size ; i++) {
@@ -22,19 +20,28 @@ void dump(const llama_token_data_array * candidates) {
22
20
void test_top_k (const std::vector<float > & probs,
23
21
const std::vector<float > & expected_probs,
24
22
int k) {
23
+ printf (" %s:%d (%s)\n " , __FILE__, __LINE__, __func__);
24
+ fflush (stdout);
25
25
size_t n_vocab = probs.size ();
26
26
std::vector<llama_token_data> candidates;
27
27
candidates.reserve (n_vocab);
28
28
for (llama_token token_id = 0 ; token_id < (llama_token)n_vocab; token_id++) {
29
- float logit = log (probs[token_id]);
29
+ printf (" %s:%d (%s) token_id: %d\n " , __FILE__, __LINE__, __func__, token_id);
30
+ fflush (stdout);
31
+ float logit = std::log (probs[token_id]);
30
32
candidates.emplace_back (llama_token_data{token_id, logit, 0 .0f });
31
33
}
34
+ printf (" %s:%d (%s)\n " , __FILE__, __LINE__, __func__);
35
+ fflush (stdout);
32
36
33
37
llama_token_data_array candidates_p = { candidates.data (), candidates.size (), false };
38
+ printf (" %s:%d (%s)\n " , __FILE__, __LINE__, __func__);
39
+ fflush (stdout);
34
40
llama_sample_softmax (nullptr , &candidates_p);
35
- // DUMP(&candidates_p);
41
+ DUMP (&candidates_p);
36
42
llama_sample_top_k (nullptr , &candidates_p, k);
37
- // DUMP(&candidates_p);
43
+ DUMP (&candidates_p);
44
+ fflush (stdout);
38
45
39
46
assert (candidates_p.size == expected_probs.size ());
40
47
for (size_t i = 0 ; i < candidates_p.size ; i++) {
@@ -57,9 +64,9 @@ void test_top_p(const std::vector<float> & probs,
57
64
58
65
llama_token_data_array candidates_p = { candidates.data (), candidates.size (), false };
59
66
llama_sample_softmax (nullptr , &candidates_p);
60
- // DUMP(&candidates_p);
67
+ DUMP (&candidates_p);
61
68
llama_sample_top_p (nullptr , &candidates_p, p);
62
- // DUMP(&candidates_p);
69
+ DUMP (&candidates_p);
63
70
64
71
assert (candidates_p.size == expected_probs.size ());
65
72
for (size_t i = 0 ; i < candidates_p.size ; i++) {
@@ -80,9 +87,9 @@ void test_tfs(const std::vector<float> & probs,
80
87
}
81
88
82
89
llama_token_data_array candidates_p = { candidates.data (), candidates.size (), false };
83
- // DUMP(&candidates_p);
90
+ DUMP (&candidates_p);
84
91
llama_sample_tail_free (nullptr , &candidates_p, z);
85
- // DUMP(&candidates_p);
92
+ DUMP (&candidates_p);
86
93
87
94
assert (candidates_p.size == expected_probs.size ());
88
95
for (size_t i = 0 ; i < candidates_p.size ; i++) {
@@ -103,9 +110,9 @@ void test_typical(const std::vector<float> & probs,
103
110
}
104
111
105
112
llama_token_data_array candidates_p = { candidates.data (), candidates.size (), false };
106
- // DUMP(&candidates_p);
113
+ DUMP (&candidates_p);
107
114
llama_sample_typical (nullptr , &candidates_p, p);
108
- // DUMP(&candidates_p);
115
+ DUMP (&candidates_p);
109
116
110
117
assert (candidates_p.size == expected_probs.size ());
111
118
for (size_t i = 0 ; i < candidates_p.size ; i++) {
@@ -172,6 +179,8 @@ void test_frequency_presence_penalty(
172
179
}
173
180
174
181
int main (void ) {
182
+ printf (" main\n " );
183
+ fflush (stdout);
175
184
test_top_k ({0.1 , 0.2 , 0.3 , 0.4 }, {0.4 }, 1 );
176
185
test_top_k ({0.1 , 0.2 , 0.3 , 0.4 }, {0.4 , 0.3 , 0.2 }, 3 );
177
186
0 commit comments