Skip to content

Commit b2f9cdb

Browse files
committed
Syntax errors caused by unclosed {, [, ( mention specific location
Aside from a few very specific syntax errors for which detailed exceptions are thrown, generally PHP just emits the default error messages generated by bison on syntax error. These messages are very uninformative; they just say "Unexpected ... at line ...". This is most problematic with constructs which can span an arbitrary number of lines, such as blocks of code delimited by { }, 'if' conditions delimited by ( ), and so on. If a closing delimiter is missed, the block will run for the entire remainder of the source file (which could be thousands of lines), and then at the end, a parse error will be thrown with the dreaded words: "Unexpected end of file". Therefore, track the positions of opening and closing delimiters and ensure that they match up correctly. If any mismatch or missing delimiter is detected, immediately throw a parse error which points the user to the offending line. This is best done in the *lexer* and not in the parser.
1 parent fcc6da3 commit b2f9cdb

9 files changed

+193
-16
lines changed

Zend/tests/bug60099.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ namespace foo {
77

88
?>
99
--EXPECTF--
10-
Parse error: syntax error, unexpected end of file in %s on line %d
10+
Parse error: Unclosed '{' on line 2 in %s on line %d

Zend/tests/eval_parse_error_with_doc_comment.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ EOC
1212

1313
?>
1414
--EXPECTF--
15-
Parse error: syntax error, unexpected end of file in %s(%d) : eval()'d code on line %d
15+
Parse error: Unclosed '{' in %s(%d) : eval()'d code on line %d

Zend/tests/require_parse_exception.phpt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ var_dump("\u{ffffff}");');
4343
?>
4444
--EXPECT--
4545
Deprecated: Directive 'allow_url_include' is deprecated in Unknown on line 0
46-
syntax error, unexpected end of file on line 2
47-
syntax error, unexpected end of file on line 3
46+
Unclosed '{' on line 2
47+
Unclosed '{' on line 3
4848
syntax error, unexpected end of file, expecting '(' on line 2
4949
Invalid numeric literal on line 2
5050
Invalid UTF-8 codepoint escape sequence on line 2

Zend/zend_globals.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ struct _zend_php_scanner_globals {
285285
int yy_state;
286286
zend_stack state_stack;
287287
zend_ptr_stack heredoc_label_stack;
288+
zend_stack nest_location_stack; /* for syntax error reporting */
288289
zend_bool heredoc_scan_ahead;
289290
int heredoc_indentation;
290291
zend_bool heredoc_indentation_uses_spaces;

Zend/zend_language_scanner.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ typedef struct _zend_lex_state {
3030
int yy_state;
3131
zend_stack state_stack;
3232
zend_ptr_stack heredoc_label_stack;
33+
zend_stack nest_location_stack; /* for syntax error reporting */
3334

3435
zend_file_handle *in;
3536
uint32_t lineno;
@@ -63,6 +64,12 @@ typedef struct _zend_heredoc_label {
6364
zend_bool indentation_uses_spaces;
6465
} zend_heredoc_label;
6566

67+
/* Track locations of unclosed {, [, (, etc. for better syntax error reporting */
68+
typedef struct _zend_nest_location {
69+
char *text; /* points into yytext */
70+
int lineno;
71+
} zend_nest_location;
72+
6673
BEGIN_EXTERN_C()
6774
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state);
6875
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state);

Zend/zend_language_scanner.l

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ void startup_scanner(void)
192192
CG(doc_comment) = NULL;
193193
CG(extra_fn_flags) = 0;
194194
zend_stack_init(&SCNG(state_stack), sizeof(int));
195+
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
195196
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
196197
SCNG(heredoc_scan_ahead) = 0;
197198
}
@@ -205,6 +206,7 @@ void shutdown_scanner(void)
205206
CG(parse_error) = 0;
206207
RESET_DOC_COMMENT();
207208
zend_stack_destroy(&SCNG(state_stack));
209+
zend_stack_destroy(&SCNG(nest_location_stack));
208210
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
209211
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
210212
SCNG(heredoc_scan_ahead) = 0;
@@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
223225
lex_state->state_stack = SCNG(state_stack);
224226
zend_stack_init(&SCNG(state_stack), sizeof(int));
225227

228+
lex_state->nest_location_stack = SCNG(nest_location_stack);
229+
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
230+
226231
lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
227232
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
228233

@@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
258263
zend_stack_destroy(&SCNG(state_stack));
259264
SCNG(state_stack) = lex_state->state_stack;
260265

266+
zend_stack_destroy(&SCNG(nest_location_stack));
267+
SCNG(nest_location_stack) = lex_state->nest_location_stack;
268+
261269
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
262270
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
263271
SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
@@ -1250,6 +1258,69 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
12501258
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
12511259
}
12521260

1261+
/* Check that { }, [ ], ( ) are nested correctly */
1262+
static void report_bad_nesting(char opening, int opening_lineno, char closing)
1263+
{
1264+
char buf[256];
1265+
int used = 0;
1266+
1267+
if (opening == '$') {
1268+
used = snprintf(buf, sizeof(buf), "Unclosed '${'");
1269+
} else {
1270+
used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening);
1271+
}
1272+
1273+
if (opening_lineno != CG(zend_lineno)) {
1274+
used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno);
1275+
}
1276+
1277+
if (closing) { /* 'closing' will be 0 if at end of file */
1278+
used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing);
1279+
}
1280+
1281+
zend_throw_exception(zend_ce_parse_error, buf, 0);
1282+
}
1283+
1284+
static void enter_nesting(char *location)
1285+
{
1286+
zend_nest_location nest_loc = {location, CG(zend_lineno)};
1287+
zend_stack_push(&SCNG(nest_location_stack), &nest_loc);
1288+
}
1289+
1290+
static int exit_nesting(char *location)
1291+
{
1292+
if (zend_stack_is_empty(&SCNG(nest_location_stack))) {
1293+
zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", *location);
1294+
return -1;
1295+
}
1296+
1297+
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
1298+
char opening = *nest_loc->text;
1299+
char closing = *location;
1300+
1301+
if ((opening == '{' && closing != '}') ||
1302+
(opening == '[' && closing != ']') ||
1303+
(opening == '(' && closing != ')') ||
1304+
(opening == '$' && closing != '}')) { /* for ${ */
1305+
report_bad_nesting(opening, nest_loc->lineno, closing);
1306+
return -1;
1307+
}
1308+
1309+
zend_stack_del_top(&SCNG(nest_location_stack));
1310+
return 0;
1311+
}
1312+
1313+
static int check_nesting_at_end()
1314+
{
1315+
if (!zend_stack_is_empty(&SCNG(nest_location_stack))) {
1316+
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
1317+
report_bad_nesting(*nest_loc->text, nest_loc->lineno, 0);
1318+
return -1;
1319+
}
1320+
1321+
return 0;
1322+
}
1323+
12531324
#define PARSER_MODE() \
12541325
EXPECTED(elem != NULL)
12551326

@@ -1277,6 +1348,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
12771348
goto emit_token; \
12781349
} while (0)
12791350

1351+
#define RETURN_EXIT_NESTING_TOKEN(_token) do { \
1352+
if (exit_nesting(yytext) && PARSER_MODE()) { \
1353+
RETURN_TOKEN(T_ERROR); \
1354+
} else { \
1355+
RETURN_TOKEN(_token); \
1356+
} \
1357+
} while(0)
1358+
1359+
#define RETURN_END_TOKEN do { \
1360+
if (check_nesting_at_end() && PARSER_MODE()) { \
1361+
RETURN_TOKEN(T_ERROR); \
1362+
} else { \
1363+
RETURN_TOKEN(END); \
1364+
} \
1365+
} while (0)
1366+
12801367
int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
12811368
{
12821369
int token;
@@ -1771,28 +1858,35 @@ NEWLINE ("\r"|"\n"|"\r\n")
17711858
}
17721859

17731860
<ST_IN_SCRIPTING>{TOKENS} {
1774-
RETURN_TOKEN(yytext[0]);
1861+
if (yytext[0] == ']' || yytext[0] == ')') {
1862+
/* Check that ] and ) match up properly with a preceding [ or ( */
1863+
RETURN_EXIT_NESTING_TOKEN(yytext[0]);
1864+
} else {
1865+
if (yytext[0] == '[' || yytext[0] == '(') enter_nesting(yytext);
1866+
RETURN_TOKEN(yytext[0]);
1867+
}
17751868
}
17761869

17771870

17781871
<ST_IN_SCRIPTING>"{" {
17791872
yy_push_state(ST_IN_SCRIPTING);
1873+
enter_nesting(yytext);
17801874
RETURN_TOKEN('{');
17811875
}
17821876

17831877

17841878
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
17851879
yy_push_state(ST_LOOKING_FOR_VARNAME);
1880+
enter_nesting(yytext);
17861881
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
17871882
}
17881883

1789-
17901884
<ST_IN_SCRIPTING>"}" {
17911885
RESET_DOC_COMMENT();
17921886
if (!zend_stack_is_empty(&SCNG(state_stack))) {
17931887
yy_pop_state();
17941888
}
1795-
RETURN_TOKEN('}');
1889+
RETURN_EXIT_NESTING_TOKEN('}');
17961890
}
17971891

17981892

@@ -2088,7 +2182,7 @@ string:
20882182

20892183
<INITIAL>{ANY_CHAR} {
20902184
if (YYCURSOR > YYLIMIT) {
2091-
RETURN_TOKEN(END);
2185+
RETURN_END_TOKEN;
20922186
}
20932187

20942188
inline_char_handler:
@@ -2569,6 +2663,7 @@ skip_escape_conversion:
25692663
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
25702664
yy_push_state(ST_IN_SCRIPTING);
25712665
yyless(1);
2666+
enter_nesting(yytext);
25722667
RETURN_TOKEN(T_CURLY_OPEN);
25732668
}
25742669
@@ -2593,7 +2688,7 @@ skip_escape_conversion:
25932688
}
25942689
25952690
if (YYCURSOR > YYLIMIT) {
2596-
RETURN_TOKEN(END);
2691+
RETURN_END_TOKEN;
25972692
}
25982693
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
25992694
YYCURSOR++;
@@ -2640,7 +2735,7 @@ double_quotes_scan_done:
26402735
26412736
<ST_BACKQUOTE>{ANY_CHAR} {
26422737
if (YYCURSOR > YYLIMIT) {
2643-
RETURN_TOKEN(END);
2738+
RETURN_END_TOKEN;
26442739
}
26452740
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
26462741
YYCURSOR++;
@@ -2689,7 +2784,7 @@ double_quotes_scan_done:
26892784
int newline = 0, indentation = 0, spacing = 0;
26902785
26912786
if (YYCURSOR > YYLIMIT) {
2692-
RETURN_TOKEN(END);
2787+
RETURN_END_TOKEN;
26932788
}
26942789
26952790
YYCURSOR--;
@@ -2813,7 +2908,7 @@ heredoc_scan_done:
28132908
int newline = 0, indentation = 0, spacing = -1;
28142909
28152910
if (YYCURSOR > YYLIMIT) {
2816-
RETURN_TOKEN(END);
2911+
RETURN_END_TOKEN;
28172912
}
28182913
28192914
YYCURSOR--;
@@ -2901,7 +2996,7 @@ nowdoc_scan_done:
29012996
29022997
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
29032998
if (YYCURSOR > YYLIMIT) {
2904-
RETURN_TOKEN(END);
2999+
RETURN_END_TOKEN;
29053000
}
29063001
29073002
RETURN_TOKEN(T_BAD_CHARACTER);

ext/tokenizer/tests/token_get_all_variation17.phpt

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ try {
3232
} catch(Exception $e) {
3333
echo "caught exception:";
3434
}
35+
}
3536
?>';
3637
$tokens = token_get_all($source);
3738
var_dump($tokens);
@@ -40,7 +41,7 @@ echo "Done"
4041
?>
4142
--EXPECTF--
4243
*** Testing token_get_all() : with exception keywords ***
43-
array(81) {
44+
array(83) {
4445
[0]=>
4546
array(3) {
4647
[0]=>
@@ -601,13 +602,25 @@ array(81) {
601602
int(14)
602603
}
603604
[80]=>
605+
string(1) "}"
606+
[81]=>
604607
array(3) {
605608
[0]=>
606609
int(%d)
607610
[1]=>
608-
string(2) "?>"
611+
string(1) "
612+
"
609613
[2]=>
610614
int(15)
611615
}
616+
[82]=>
617+
array(3) {
618+
[0]=>
619+
int(%d)
620+
[1]=>
621+
string(2) "?>"
622+
[2]=>
623+
int(16)
624+
}
612625
}
613626
Done

ext/tokenizer/tokenizer.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
392392
array_init(return_value);
393393

394394
while ((token_type = lex_scan(&token, NULL))) {
395+
ZEND_ASSERT(token_type != T_ERROR);
396+
395397
add_token(
396398
return_value, token_type, zendtext, zendleng, token_line,
397399
token_class, &interned_strings);
@@ -408,7 +410,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
408410
&& --need_tokens == 0
409411
) {
410412
/* fetch the rest into a T_INLINE_HTML */
411-
if (zendcursor != zendlimit) {
413+
if (zendcursor < zendlimit) {
412414
add_token(
413415
return_value, T_INLINE_HTML, zendcursor, zendlimit - zendcursor,
414416
token_line, token_class, &interned_strings);

0 commit comments

Comments
 (0)