Skip to content

Commit 80598f1

Browse files
alexdowadnikic
authored andcommitted
Syntax errors caused by unclosed {, [, ( mention specific location
Aside from a few very specific syntax errors for which detailed exceptions are thrown, generally PHP just emits the default error messages generated by bison on syntax error. These messages are very uninformative; they just say "Unexpected ... at line ...". This is most problematic with constructs which can span an arbitrary number of lines, such as blocks of code delimited by { }, 'if' conditions delimited by ( ), and so on. If a closing delimiter is missed, the block will run for the entire remainder of the source file (which could be thousands of lines), and then at the end, a parse error will be thrown with the dreaded words: "Unexpected end of file". Therefore, track the positions of opening and closing delimiters and ensure that they match up correctly. If any mismatch or missing delimiter is detected, immediately throw a parse error which points the user to the offending line. This is best done in the *lexer* and not in the parser. Thanks to Nikita Popov and George Peter Banyard for suggesting improvements. Fixes bug #79368. Closes GH-5364.
1 parent d4471c6 commit 80598f1

10 files changed

+192
-17
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ PHP NEWS
1818
(Nikita)
1919
. Fixed bug #79462 (method_exists and property_exists incoherent behavior).
2020
(cmb)
21+
. Fixed bug #79368 ("Unexpected end of file" is not an acceptable error
22+
message). (Alex Dowad)
2123

2224
- CURL:
2325
. Bumped required libcurl version to 7.29.0. (cmb)

Zend/tests/bug60099.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ namespace foo {
77

88
?>
99
--EXPECTF--
10-
Parse error: syntax error, unexpected end of file in %s on line %d
10+
Parse error: Unclosed '{' on line 2 in %s on line %d

Zend/tests/eval_parse_error_with_doc_comment.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ EOC
1212

1313
?>
1414
--EXPECTF--
15-
Parse error: syntax error, unexpected end of file in %s(%d) : eval()'d code on line %d
15+
Parse error: Unclosed '{' in %s(%d) : eval()'d code on line %d

Zend/tests/require_parse_exception.phpt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ var_dump("\u{ffffff}");');
4343
?>
4444
--EXPECT--
4545
Deprecated: Directive 'allow_url_include' is deprecated in Unknown on line 0
46-
syntax error, unexpected end of file on line 2
47-
syntax error, unexpected end of file on line 3
46+
Unclosed '{' on line 2
47+
Unclosed '{' on line 3
4848
syntax error, unexpected end of file, expecting '(' on line 2
4949
Invalid numeric literal on line 2
5050
Invalid UTF-8 codepoint escape sequence on line 2

Zend/zend_globals.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ struct _zend_php_scanner_globals {
285285
int yy_state;
286286
zend_stack state_stack;
287287
zend_ptr_stack heredoc_label_stack;
288+
zend_stack nest_location_stack; /* for syntax error reporting */
288289
zend_bool heredoc_scan_ahead;
289290
int heredoc_indentation;
290291
zend_bool heredoc_indentation_uses_spaces;

Zend/zend_language_scanner.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ typedef struct _zend_lex_state {
3030
int yy_state;
3131
zend_stack state_stack;
3232
zend_ptr_stack heredoc_label_stack;
33+
zend_stack nest_location_stack; /* for syntax error reporting */
3334

3435
zend_file_handle *in;
3536
uint32_t lineno;
@@ -63,6 +64,12 @@ typedef struct _zend_heredoc_label {
6364
zend_bool indentation_uses_spaces;
6465
} zend_heredoc_label;
6566

67+
/* Track locations of unclosed {, [, (, etc. for better syntax error reporting */
68+
typedef struct _zend_nest_location {
69+
char text;
70+
int lineno;
71+
} zend_nest_location;
72+
6673
BEGIN_EXTERN_C()
6774
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state);
6875
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state);

Zend/zend_language_scanner.l

Lines changed: 103 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ void startup_scanner(void)
192192
CG(doc_comment) = NULL;
193193
CG(extra_fn_flags) = 0;
194194
zend_stack_init(&SCNG(state_stack), sizeof(int));
195+
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
195196
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
196197
SCNG(heredoc_scan_ahead) = 0;
197198
}
@@ -205,6 +206,7 @@ void shutdown_scanner(void)
205206
CG(parse_error) = 0;
206207
RESET_DOC_COMMENT();
207208
zend_stack_destroy(&SCNG(state_stack));
209+
zend_stack_destroy(&SCNG(nest_location_stack));
208210
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
209211
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
210212
SCNG(heredoc_scan_ahead) = 0;
@@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
223225
lex_state->state_stack = SCNG(state_stack);
224226
zend_stack_init(&SCNG(state_stack), sizeof(int));
225227

228+
lex_state->nest_location_stack = SCNG(nest_location_stack);
229+
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
230+
226231
lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
227232
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
228233

@@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
258263
zend_stack_destroy(&SCNG(state_stack));
259264
SCNG(state_stack) = lex_state->state_stack;
260265

266+
zend_stack_destroy(&SCNG(nest_location_stack));
267+
SCNG(nest_location_stack) = lex_state->nest_location_stack;
268+
261269
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
262270
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
263271
SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
@@ -1250,6 +1258,63 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
12501258
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
12511259
}
12521260

1261+
/* Check that { }, [ ], ( ) are nested correctly */
1262+
static void report_bad_nesting(char opening, int opening_lineno, char closing)
1263+
{
1264+
char buf[256];
1265+
size_t used = 0;
1266+
1267+
used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening);
1268+
1269+
if (opening_lineno != CG(zend_lineno)) {
1270+
used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno);
1271+
}
1272+
1273+
if (closing) { /* 'closing' will be 0 if at end of file */
1274+
used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing);
1275+
}
1276+
1277+
zend_throw_exception(zend_ce_parse_error, buf, 0);
1278+
}
1279+
1280+
static void enter_nesting(char opening)
1281+
{
1282+
zend_nest_location nest_loc = {opening, CG(zend_lineno)};
1283+
zend_stack_push(&SCNG(nest_location_stack), &nest_loc);
1284+
}
1285+
1286+
static int exit_nesting(char closing)
1287+
{
1288+
if (zend_stack_is_empty(&SCNG(nest_location_stack))) {
1289+
zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing);
1290+
return -1;
1291+
}
1292+
1293+
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
1294+
char opening = nest_loc->text;
1295+
1296+
if ((opening == '{' && closing != '}') ||
1297+
(opening == '[' && closing != ']') ||
1298+
(opening == '(' && closing != ')')) {
1299+
report_bad_nesting(opening, nest_loc->lineno, closing);
1300+
return -1;
1301+
}
1302+
1303+
zend_stack_del_top(&SCNG(nest_location_stack));
1304+
return 0;
1305+
}
1306+
1307+
static int check_nesting_at_end()
1308+
{
1309+
if (!zend_stack_is_empty(&SCNG(nest_location_stack))) {
1310+
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
1311+
report_bad_nesting(nest_loc->text, nest_loc->lineno, 0);
1312+
return -1;
1313+
}
1314+
1315+
return 0;
1316+
}
1317+
12531318
#define PARSER_MODE() \
12541319
EXPECTED(elem != NULL)
12551320

@@ -1277,6 +1342,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
12771342
goto emit_token; \
12781343
} while (0)
12791344

1345+
#define RETURN_EXIT_NESTING_TOKEN(_token) do { \
1346+
if (exit_nesting(_token) && PARSER_MODE()) { \
1347+
RETURN_TOKEN(T_ERROR); \
1348+
} else { \
1349+
RETURN_TOKEN(_token); \
1350+
} \
1351+
} while(0)
1352+
1353+
#define RETURN_END_TOKEN do { \
1354+
if (check_nesting_at_end() && PARSER_MODE()) { \
1355+
RETURN_TOKEN(T_ERROR); \
1356+
} else { \
1357+
RETURN_TOKEN(END); \
1358+
} \
1359+
} while (0)
1360+
12801361
int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
12811362
{
12821363
int token;
@@ -1297,7 +1378,7 @@ BNUM "0b"[01]+(_[01]+)*
12971378
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
12981379
WHITESPACE [ \n\r\t]+
12991380
TABS_AND_SPACES [ \t]*
1300-
TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
1381+
TOKENS [;:,.|^&+-/*=%!~$<>?@]
13011382
ANY_CHAR [^]
13021383
NEWLINE ("\r"|"\n"|"\r\n")
13031384
@@ -1770,29 +1851,40 @@ NEWLINE ("\r"|"\n"|"\r\n")
17701851
RETURN_TOKEN(T_SR);
17711852
}
17721853

1854+
<ST_IN_SCRIPTING>"]"|")" {
1855+
/* Check that ] and ) match up properly with a preceding [ or ( */
1856+
RETURN_EXIT_NESTING_TOKEN(yytext[0]);
1857+
}
1858+
1859+
<ST_IN_SCRIPTING>"["|"(" {
1860+
enter_nesting(yytext[0]);
1861+
RETURN_TOKEN(yytext[0]);
1862+
}
1863+
17731864
<ST_IN_SCRIPTING>{TOKENS} {
17741865
RETURN_TOKEN(yytext[0]);
17751866
}
17761867

17771868

17781869
<ST_IN_SCRIPTING>"{" {
17791870
yy_push_state(ST_IN_SCRIPTING);
1871+
enter_nesting('{');
17801872
RETURN_TOKEN('{');
17811873
}
17821874

17831875

17841876
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
17851877
yy_push_state(ST_LOOKING_FOR_VARNAME);
1878+
enter_nesting('{');
17861879
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
17871880
}
17881881

1789-
17901882
<ST_IN_SCRIPTING>"}" {
17911883
RESET_DOC_COMMENT();
17921884
if (!zend_stack_is_empty(&SCNG(state_stack))) {
17931885
yy_pop_state();
17941886
}
1795-
RETURN_TOKEN('}');
1887+
RETURN_EXIT_NESTING_TOKEN('}');
17961888
}
17971889

17981890

@@ -2088,7 +2180,7 @@ string:
20882180

20892181
<INITIAL>{ANY_CHAR} {
20902182
if (YYCURSOR > YYLIMIT) {
2091-
RETURN_TOKEN(END);
2183+
RETURN_END_TOKEN;
20922184
}
20932185

20942186
inline_char_handler:
@@ -2165,7 +2257,7 @@ inline_char_handler:
21652257
RETURN_TOKEN(']');
21662258
}
21672259

2168-
<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
2260+
<ST_VAR_OFFSET>{TOKENS}|[[(){}"`] {
21692261
/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
21702262
RETURN_TOKEN(yytext[0]);
21712263
}
@@ -2569,6 +2661,7 @@ skip_escape_conversion:
25692661
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
25702662
yy_push_state(ST_IN_SCRIPTING);
25712663
yyless(1);
2664+
enter_nesting('{');
25722665
RETURN_TOKEN(T_CURLY_OPEN);
25732666
}
25742667
@@ -2593,7 +2686,7 @@ skip_escape_conversion:
25932686
}
25942687
25952688
if (YYCURSOR > YYLIMIT) {
2596-
RETURN_TOKEN(END);
2689+
RETURN_END_TOKEN;
25972690
}
25982691
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
25992692
YYCURSOR++;
@@ -2640,7 +2733,7 @@ double_quotes_scan_done:
26402733
26412734
<ST_BACKQUOTE>{ANY_CHAR} {
26422735
if (YYCURSOR > YYLIMIT) {
2643-
RETURN_TOKEN(END);
2736+
RETURN_END_TOKEN;
26442737
}
26452738
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
26462739
YYCURSOR++;
@@ -2689,7 +2782,7 @@ double_quotes_scan_done:
26892782
int newline = 0, indentation = 0, spacing = 0;
26902783
26912784
if (YYCURSOR > YYLIMIT) {
2692-
RETURN_TOKEN(END);
2785+
RETURN_END_TOKEN;
26932786
}
26942787
26952788
YYCURSOR--;
@@ -2813,7 +2906,7 @@ heredoc_scan_done:
28132906
int newline = 0, indentation = 0, spacing = -1;
28142907
28152908
if (YYCURSOR > YYLIMIT) {
2816-
RETURN_TOKEN(END);
2909+
RETURN_END_TOKEN;
28172910
}
28182911
28192912
YYCURSOR--;
@@ -2901,7 +2994,7 @@ nowdoc_scan_done:
29012994
29022995
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
29032996
if (YYCURSOR > YYLIMIT) {
2904-
RETURN_TOKEN(END);
2997+
RETURN_END_TOKEN;
29052998
}
29062999
29073000
RETURN_TOKEN(T_BAD_CHARACTER);

ext/tokenizer/tests/token_get_all_variation17.phpt

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ try {
3232
} catch(Exception $e) {
3333
echo "caught exception:";
3434
}
35+
}
3536
?>';
3637
$tokens = token_get_all($source);
3738
var_dump($tokens);
@@ -40,7 +41,7 @@ echo "Done"
4041
?>
4142
--EXPECTF--
4243
*** Testing token_get_all() : with exception keywords ***
43-
array(81) {
44+
array(83) {
4445
[0]=>
4546
array(3) {
4647
[0]=>
@@ -601,13 +602,25 @@ array(81) {
601602
int(14)
602603
}
603604
[80]=>
605+
string(1) "}"
606+
[81]=>
604607
array(3) {
605608
[0]=>
606609
int(%d)
607610
[1]=>
608-
string(2) "?>"
611+
string(1) "
612+
"
609613
[2]=>
610614
int(15)
611615
}
616+
[82]=>
617+
array(3) {
618+
[0]=>
619+
int(%d)
620+
[1]=>
621+
string(2) "?>"
622+
[2]=>
623+
int(16)
624+
}
612625
}
613626
Done

ext/tokenizer/tokenizer.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
392392
array_init(return_value);
393393

394394
while ((token_type = lex_scan(&token, NULL))) {
395+
ZEND_ASSERT(token_type != T_ERROR);
396+
395397
add_token(
396398
return_value, token_type, zendtext, zendleng, token_line,
397399
token_class, &interned_strings);
@@ -408,7 +410,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
408410
&& --need_tokens == 0
409411
) {
410412
/* fetch the rest into a T_INLINE_HTML */
411-
if (zendcursor != zendlimit) {
413+
if (zendcursor < zendlimit) {
412414
add_token(
413415
return_value, T_INLINE_HTML, zendcursor, zendlimit - zendcursor,
414416
token_line, token_class, &interned_strings);

0 commit comments

Comments
 (0)