Skip to content

Syntax errors caused by unclosed {, [, ( mention specific location #5364

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Zend/tests/bug60099.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ namespace foo {

?>
--EXPECTF--
Parse error: syntax error, unexpected end of file in %s on line %d
Parse error: Unclosed '{' on line 2 in %s on line %d
2 changes: 1 addition & 1 deletion Zend/tests/eval_parse_error_with_doc_comment.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ EOC

?>
--EXPECTF--
Parse error: syntax error, unexpected end of file in %s(%d) : eval()'d code on line %d
Parse error: Unclosed '{' in %s(%d) : eval()'d code on line %d
4 changes: 2 additions & 2 deletions Zend/tests/require_parse_exception.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ var_dump("\u{ffffff}");');
?>
--EXPECT--
Deprecated: Directive 'allow_url_include' is deprecated in Unknown on line 0
syntax error, unexpected end of file on line 2
syntax error, unexpected end of file on line 3
Unclosed '{' on line 2
Unclosed '{' on line 3
syntax error, unexpected end of file, expecting '(' on line 2
Invalid numeric literal on line 2
Invalid UTF-8 codepoint escape sequence on line 2
Expand Down
1 change: 1 addition & 0 deletions Zend/zend_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ struct _zend_php_scanner_globals {
int yy_state;
zend_stack state_stack;
zend_ptr_stack heredoc_label_stack;
zend_stack nest_location_stack; /* for syntax error reporting */
zend_bool heredoc_scan_ahead;
int heredoc_indentation;
zend_bool heredoc_indentation_uses_spaces;
Expand Down
7 changes: 7 additions & 0 deletions Zend/zend_language_scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ typedef struct _zend_lex_state {
int yy_state;
zend_stack state_stack;
zend_ptr_stack heredoc_label_stack;
zend_stack nest_location_stack; /* for syntax error reporting */

zend_file_handle *in;
uint32_t lineno;
Expand Down Expand Up @@ -63,6 +64,12 @@ typedef struct _zend_heredoc_label {
zend_bool indentation_uses_spaces;
} zend_heredoc_label;

/* Track locations of unclosed {, [, (, etc. for better syntax error reporting */
typedef struct _zend_nest_location {
char text;
int lineno;
} zend_nest_location;

BEGIN_EXTERN_C()
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state);
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state);
Expand Down
113 changes: 103 additions & 10 deletions Zend/zend_language_scanner.l
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ void startup_scanner(void)
CG(doc_comment) = NULL;
CG(extra_fn_flags) = 0;
zend_stack_init(&SCNG(state_stack), sizeof(int));
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));
zend_ptr_stack_init(&SCNG(heredoc_label_stack));
SCNG(heredoc_scan_ahead) = 0;
}
Expand All @@ -205,6 +206,7 @@ void shutdown_scanner(void)
CG(parse_error) = 0;
RESET_DOC_COMMENT();
zend_stack_destroy(&SCNG(state_stack));
zend_stack_destroy(&SCNG(nest_location_stack));
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
SCNG(heredoc_scan_ahead) = 0;
Expand All @@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
lex_state->state_stack = SCNG(state_stack);
zend_stack_init(&SCNG(state_stack), sizeof(int));

lex_state->nest_location_stack = SCNG(nest_location_stack);
zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location));

lex_state->heredoc_label_stack = SCNG(heredoc_label_stack);
zend_ptr_stack_init(&SCNG(heredoc_label_stack));

Expand Down Expand Up @@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
zend_stack_destroy(&SCNG(state_stack));
SCNG(state_stack) = lex_state->state_stack;

zend_stack_destroy(&SCNG(nest_location_stack));
SCNG(nest_location_stack) = lex_state->nest_location_stack;

zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack;
Expand Down Expand Up @@ -1250,6 +1258,63 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label);
}

/* Check that { }, [ ], ( ) are nested correctly */
static void report_bad_nesting(char opening, int opening_lineno, char closing)
{
char buf[256];
size_t used = 0;

used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening);

if (opening_lineno != CG(zend_lineno)) {
used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno);
}

if (closing) { /* 'closing' will be 0 if at end of file */
used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

used += is not needed, but I still do it in case someone comes and adds more code below. Don't want any smashed stacks.

}

zend_throw_exception(zend_ce_parse_error, buf, 0);
}

static void enter_nesting(char opening)
{
zend_nest_location nest_loc = {opening, CG(zend_lineno)};
zend_stack_push(&SCNG(nest_location_stack), &nest_loc);
}

static int exit_nesting(char closing)
{
if (zend_stack_is_empty(&SCNG(nest_location_stack))) {
zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing);
return -1;
}

zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
char opening = nest_loc->text;

if ((opening == '{' && closing != '}') ||
(opening == '[' && closing != ']') ||
(opening == '(' && closing != ')')) {
report_bad_nesting(opening, nest_loc->lineno, closing);
return -1;
}

zend_stack_del_top(&SCNG(nest_location_stack));
return 0;
}

static int check_nesting_at_end()
{
if (!zend_stack_is_empty(&SCNG(nest_location_stack))) {
zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack));
report_bad_nesting(nest_loc->text, nest_loc->lineno, 0);
return -1;
}

return 0;
}

#define PARSER_MODE() \
EXPECTED(elem != NULL)

Expand Down Expand Up @@ -1277,6 +1342,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label)
goto emit_token; \
} while (0)

#define RETURN_EXIT_NESTING_TOKEN(_token) do { \
if (exit_nesting(_token) && PARSER_MODE()) { \
RETURN_TOKEN(T_ERROR); \
} else { \
RETURN_TOKEN(_token); \
} \
} while(0)

#define RETURN_END_TOKEN do { \
if (check_nesting_at_end() && PARSER_MODE()) { \
RETURN_TOKEN(T_ERROR); \
} else { \
RETURN_TOKEN(END); \
} \
} while (0)

int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem)
{
int token;
Expand All @@ -1297,7 +1378,7 @@ BNUM "0b"[01]+(_[01]+)*
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]*
TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@]
TOKENS [;:,.|^&+-/*=%!~$<>?@]
ANY_CHAR [^]
NEWLINE ("\r"|"\n"|"\r\n")

Expand Down Expand Up @@ -1770,29 +1851,40 @@ NEWLINE ("\r"|"\n"|"\r\n")
RETURN_TOKEN(T_SR);
}

<ST_IN_SCRIPTING>"]"|")" {
/* Check that ] and ) match up properly with a preceding [ or ( */
RETURN_EXIT_NESTING_TOKEN(yytext[0]);
}

<ST_IN_SCRIPTING>"["|"(" {
enter_nesting(yytext[0]);
RETURN_TOKEN(yytext[0]);
}

<ST_IN_SCRIPTING>{TOKENS} {
RETURN_TOKEN(yytext[0]);
}


<ST_IN_SCRIPTING>"{" {
yy_push_state(ST_IN_SCRIPTING);
enter_nesting('{');
RETURN_TOKEN('{');
}


<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
enter_nesting('{');
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}


<ST_IN_SCRIPTING>"}" {
RESET_DOC_COMMENT();
if (!zend_stack_is_empty(&SCNG(state_stack))) {
yy_pop_state();
}
RETURN_TOKEN('}');
RETURN_EXIT_NESTING_TOKEN('}');
}


Expand Down Expand Up @@ -2088,7 +2180,7 @@ string:

<INITIAL>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
RETURN_TOKEN(END);
RETURN_END_TOKEN;
}

inline_char_handler:
Expand Down Expand Up @@ -2165,7 +2257,7 @@ inline_char_handler:
RETURN_TOKEN(']');
}

<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
<ST_VAR_OFFSET>{TOKENS}|[[(){}"`] {
/* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */
RETURN_TOKEN(yytext[0]);
}
Expand Down Expand Up @@ -2569,6 +2661,7 @@ skip_escape_conversion:
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"{$" {
yy_push_state(ST_IN_SCRIPTING);
yyless(1);
enter_nesting('{');
RETURN_TOKEN(T_CURLY_OPEN);
}

Expand All @@ -2593,7 +2686,7 @@ skip_escape_conversion:
}

if (YYCURSOR > YYLIMIT) {
RETURN_TOKEN(END);
RETURN_END_TOKEN;
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
Expand Down Expand Up @@ -2640,7 +2733,7 @@ double_quotes_scan_done:

<ST_BACKQUOTE>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
RETURN_TOKEN(END);
RETURN_END_TOKEN;
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
Expand Down Expand Up @@ -2689,7 +2782,7 @@ double_quotes_scan_done:
int newline = 0, indentation = 0, spacing = 0;

if (YYCURSOR > YYLIMIT) {
RETURN_TOKEN(END);
RETURN_END_TOKEN;
}

YYCURSOR--;
Expand Down Expand Up @@ -2813,7 +2906,7 @@ heredoc_scan_done:
int newline = 0, indentation = 0, spacing = -1;

if (YYCURSOR > YYLIMIT) {
RETURN_TOKEN(END);
RETURN_END_TOKEN;
}

YYCURSOR--;
Expand Down Expand Up @@ -2901,7 +2994,7 @@ nowdoc_scan_done:

<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
RETURN_TOKEN(END);
RETURN_END_TOKEN;
}

RETURN_TOKEN(T_BAD_CHARACTER);
Expand Down
17 changes: 15 additions & 2 deletions ext/tokenizer/tests/token_get_all_variation17.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ try {
} catch(Exception $e) {
echo "caught exception:";
}
}
?>';
$tokens = token_get_all($source);
var_dump($tokens);
Expand All @@ -40,7 +41,7 @@ echo "Done"
?>
--EXPECTF--
*** Testing token_get_all() : with exception keywords ***
array(81) {
array(83) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test had bad syntax before in the code passed to token_get_all (it was missing a curly bracket). Fixed that up.

[0]=>
array(3) {
[0]=>
Expand Down Expand Up @@ -601,13 +602,25 @@ array(81) {
int(14)
}
[80]=>
string(1) "}"
[81]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(2) "?>"
string(1) "
"
[2]=>
int(15)
}
[82]=>
array(3) {
[0]=>
int(%d)
[1]=>
string(2) "?>"
[2]=>
int(16)
}
}
Done
4 changes: 3 additions & 1 deletion ext/tokenizer/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
array_init(return_value);

while ((token_type = lex_scan(&token, NULL))) {
ZEND_ASSERT(token_type != T_ERROR);

add_token(
return_value, token_type, zendtext, zendleng, token_line,
token_class, &interned_strings);
Expand All @@ -408,7 +410,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
&& --need_tokens == 0
) {
/* fetch the rest into a T_INLINE_HTML */
if (zendcursor != zendlimit) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This breaks if it receives a T_ERROR token at end of file (which was not possible before). I'm not sure why, but in that case, zendcursor is actually more than zendlimit. Changing the test to zendcursor < zendlimit makes it a bit more robust.

The added guard clause above actually takes care of this problem, but I figure changing the test to zendcursor < zendlimit is still a good idea.

if (zendcursor < zendlimit) {
add_token(
return_value, T_INLINE_HTML, zendcursor, zendlimit - zendcursor,
token_line, token_class, &interned_strings);
Expand Down
Loading