Skip to content

Commit b042b48

Browse files
committed
Zend: use Bison 3.6 feature to generate the syntax errors
Before this commit, yytnamerr was used to tailor the error messages. But it required maintaining an external state (to know whether yytnamerr was called on the unexpected token, or one of the expected ones). Bison 3.6 offers the user a means to build the error messages, by implementing yyreport_syntax_error. Do that. That's more code than before, but much easier to understand.
1 parent 826a745 commit b042b48

File tree

1 file changed

+91
-86
lines changed

1 file changed

+91
-86
lines changed

Zend/zend_language_parser.y

Lines changed: 91 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,9 @@
2828
#include "zend_language_scanner.h"
2929
#include "zend_exceptions.h"
3030

31-
#define YYSIZE_T size_t
32-
#define yytnamerr zend_yytnamerr
33-
static YYSIZE_T zend_yytnamerr(char*, const char*);
34-
3531
#ifdef _MSC_VER
36-
#define YYMALLOC malloc
37-
#define YYFREE free
32+
# define YYMALLOC malloc
33+
# define YYFREE free
3834
#endif
3935
}
4036

@@ -45,7 +41,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
4541
%define api.prefix {zend}
4642
%define api.pure full
4743
%define api.value.type {zend_parser_stack_elem}
48-
%define parse.error verbose
44+
%define parse.error custom
4945
%expect 0
5046

5147
%destructor { zend_ast_destroy($$); } <ast>
@@ -1348,85 +1344,94 @@ isset_variable:
13481344

13491345
%%
13501346

1351-
/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
1352-
quotes and backslashes, so that it's suitable for yyerror. The
1353-
heuristic is that double-quoting is unnecessary unless the string
1354-
contains an apostrophe, a comma, or backslash (other than
1355-
backslash-backslash). YYSTR is taken from yytname. If YYRES is
1356-
null, do not copy; instead, return the length of what the result
1357-
would have been. */
1358-
static YYSIZE_T zend_yytnamerr(char *yyres, const char *yystr)
1347+
static unsigned int umin (unsigned int a, unsigned int b)
13591348
{
1360-
/* CG(parse_error) states:
1361-
* 0 => yyres = NULL, yystr is the unexpected token
1362-
* 1 => yyres = NULL, yystr is one of the expected tokens
1363-
* 2 => yyres != NULL, yystr is the unexpected token
1364-
* 3 => yyres != NULL, yystr is one of the expected tokens
1365-
*/
1366-
if (yyres && CG(parse_error) < 2) {
1367-
CG(parse_error) = 2;
1368-
}
1369-
1370-
if (CG(parse_error) % 2 == 0) {
1371-
/* The unexpected token */
1372-
char buffer[120];
1373-
const unsigned char *end, *str, *tok1 = NULL, *tok2 = NULL;
1374-
unsigned int len = 0, toklen = 0, yystr_len;
1375-
1376-
CG(parse_error)++;
1377-
1378-
if (LANG_SCNG(yy_text)[0] == 0 &&
1379-
LANG_SCNG(yy_leng) == 1 &&
1380-
strcmp(yystr, "\"end of file\"") == 0) {
1381-
if (yyres) {
1382-
yystpcpy(yyres, "end of file");
1383-
}
1384-
return sizeof("end of file")-1;
1385-
}
1386-
1387-
str = LANG_SCNG(yy_text);
1388-
end = memchr(str, '\n', LANG_SCNG(yy_leng));
1389-
yystr_len = (unsigned int)strlen(yystr);
1390-
1391-
if ((tok1 = memchr(yystr, '(', yystr_len)) != NULL
1392-
&& (tok2 = zend_memrchr(yystr, ')', yystr_len)) != NULL) {
1393-
toklen = (tok2 - tok1) + 1;
1394-
} else {
1395-
tok1 = tok2 = NULL;
1396-
toklen = 0;
1397-
}
1398-
1399-
if (end == NULL) {
1400-
len = LANG_SCNG(yy_leng) > 30 ? 30 : LANG_SCNG(yy_leng);
1401-
} else {
1402-
len = (end - str) > 30 ? 30 : (end - str);
1403-
}
1404-
if (yyres) {
1405-
if (toklen) {
1406-
snprintf(buffer, sizeof(buffer), "'%.*s' %.*s", len, str, toklen, tok1);
1407-
} else {
1408-
snprintf(buffer, sizeof(buffer), "'%.*s'", len, str);
1409-
}
1410-
yystpcpy(yyres, buffer);
1411-
}
1412-
return len + (toklen ? toklen + 1 : 0) + 2;
1413-
}
1414-
1415-
/* One of the expected tokens */
1416-
if (!yyres) {
1417-
return strlen(yystr) - (*yystr == '"' ? 2 : 0);
1418-
}
1419-
1420-
if (*yystr == '"') {
1421-
YYSIZE_T yyn = 0;
1422-
const char *yyp = yystr;
1349+
return a < b ? a : b;
1350+
}
14231351

1424-
for (; *++yyp != '"'; ++yyn) {
1425-
yyres[yyn] = *yyp;
1426-
}
1427-
yyres[yyn] = '\0';
1428-
return yyn;
1429-
}
1430-
yystpcpy(yyres, yystr);
1431-
return strlen(yystr);
1352+
int
1353+
yyreport_syntax_error (const yypcontext_t *ctx)
1354+
{
1355+
int res = 0;
1356+
1357+
// Report the unexpected token.
1358+
char unexpected[120];
1359+
{
1360+
yysymbol_kind_t unexp = yypcontext_token (ctx);
1361+
if (unexp == YYSYMBOL_YYEOF) {
1362+
strncpy(unexpected, yysymbol_name (unexp), sizeof(unexpected));
1363+
} else if (unexp != YYSYMBOL_YYEMPTY) {
1364+
// Include the lexeme.
1365+
const unsigned char *str = LANG_SCNG(yy_text);
1366+
const unsigned char *end = memchr(str, '\n', LANG_SCNG(yy_leng));
1367+
const unsigned int len =
1368+
umin(30, end != NULL ? end - str : LANG_SCNG(yy_leng));
1369+
1370+
// Maybe include the token name.
1371+
// Strings are like "<<= (T_SL_EQUAL)": extract the part in parens.
1372+
const char *unexp_str = yysymbol_name (unexp);
1373+
const unsigned int unexp_len = (unsigned int)strlen(unexp_str);
1374+
const unsigned char *tok1 = memchr(unexp_str, '(', unexp_len);
1375+
const unsigned char *tok2 = zend_memrchr(unexp_str, ')', unexp_len);
1376+
const unsigned int toklen = tok1 != NULL && tok2 != NULL ? tok2 - tok1 + 1 : 0;
1377+
1378+
if (toklen) {
1379+
snprintf(unexpected, sizeof(unexpected), "'%.*s' %.*s", len, str, toklen, tok1);
1380+
} else {
1381+
snprintf(unexpected, sizeof(unexpected), "'%.*s'", len, str);
1382+
}
1383+
}
1384+
}
1385+
1386+
enum { TOKENMAX = 4 };
1387+
const char *expected[TOKENMAX];
1388+
unsigned int num_expected = 0;
1389+
{
1390+
yysymbol_kind_t exp[TOKENMAX];
1391+
int n = yypcontext_expected_tokens (ctx, exp, TOKENMAX);
1392+
if (n < 0)
1393+
// Forward errors to yyparse.
1394+
res = n;
1395+
else
1396+
{
1397+
num_expected = n;
1398+
for (int i = 0; i < num_expected; ++i)
1399+
expected[i] = yysymbol_name (exp[i]);
1400+
}
1401+
}
1402+
1403+
char buff[1024];
1404+
switch (num_expected)
1405+
{
1406+
case 0:
1407+
snprintf(buff, sizeof buff,
1408+
"syntax error, unexpected %s", unexpected);
1409+
break;
1410+
1411+
case 1:
1412+
snprintf(buff, sizeof buff,
1413+
"syntax error, unexpected %s, expecting %s",
1414+
unexpected, expected[0]);
1415+
break;
1416+
1417+
case 2:
1418+
snprintf(buff, sizeof buff,
1419+
"syntax error, unexpected %s, expecting %s or %s",
1420+
unexpected, expected[0], expected[1]);
1421+
break;
1422+
1423+
case 3:
1424+
snprintf(buff, sizeof buff,
1425+
"syntax error, unexpected %s, expecting %s or %s or %s",
1426+
unexpected, expected[0], expected[1], expected[2]);
1427+
break;
1428+
1429+
case 4:
1430+
snprintf(buff, sizeof buff,
1431+
"syntax error, unexpected %s, expecting %s or %s or %s or %s",
1432+
unexpected, expected[0], expected[1], expected[2], expected[3]);
1433+
break;
1434+
}
1435+
yyerror (buff);
1436+
return res;
14321437
}

0 commit comments

Comments
 (0)