@@ -6903,10 +6903,13 @@ struct llama_grammar_candidate {
6903
6903
// pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
6904
6904
static std::pair<std::vector<uint32_t >, llama_partial_utf8> decode_utf8 (
6905
6905
const char * src,
6906
+ size_t n_src,
6906
6907
llama_partial_utf8 partial_start) {
6907
6908
static const int lookup[] = { 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 2 , 2 , 3 , 4 };
6908
6909
const char * pos = src;
6909
6910
std::vector<uint32_t > code_points;
6911
+ // common english strings have the same number of codepoints and bytes.
6912
+ code_points.reserve (n_src);
6910
6913
uint32_t value = partial_start.value ;
6911
6914
int n_remain = partial_start.n_remain ;
6912
6915
@@ -6957,6 +6960,13 @@ static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
6957
6960
return std::make_pair (std::move (code_points), llama_partial_utf8{ value, n_remain });
6958
6961
}
6959
6962
6963
+ static std::pair<std::vector<uint32_t >, llama_partial_utf8> decode_utf8 (
6964
+ std::string src,
6965
+ llama_partial_utf8 partial_start
6966
+ ) {
6967
+ return decode_utf8 (src.c_str (), src.size (), partial_start);
6968
+ }
6969
+
6960
6970
// returns true iff pos points to the end of one of the definitions of a rule
6961
6971
static bool llama_grammar_is_end_of_sequence (const llama_grammar_element * pos) {
6962
6972
switch (pos->type ) {
@@ -7580,7 +7590,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
7580
7590
} else if (piece.empty () || piece[0 ] == 0 ) {
7581
7591
candidates->data [i].logit = -INFINITY;
7582
7592
} else {
7583
- candidates_decoded.push_back (decode_utf8 (piece. c_str () , grammar->partial_utf8 ));
7593
+ candidates_decoded.push_back (decode_utf8 (piece, grammar->partial_utf8 ));
7584
7594
candidates_grammar.push_back ({ i, candidates_decoded.back ().first .data (), candidates_decoded.back ().second });
7585
7595
}
7586
7596
}
@@ -7787,7 +7797,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
7787
7797
const std::string piece = llama_token_to_piece (ctx, token);
7788
7798
7789
7799
// Note terminating 0 in decoded string
7790
- const auto decoded = decode_utf8 (piece. c_str () , grammar->partial_utf8 );
7800
+ const auto decoded = decode_utf8 (piece, grammar->partial_utf8 );
7791
7801
const auto & code_points = decoded.first ;
7792
7802
for (auto it = code_points.begin (), end = code_points.end () - 1 ; it != end; ++it) {
7793
7803
grammar->stacks = llama_grammar_accept (grammar->rules , grammar->stacks , *it);
0 commit comments