From 3dcf39cc8a52ecdd023e3eba50538f69d02f0f0d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 5 Jun 2020 16:55:20 +0200 Subject: [PATCH 1/5] Pass keyword yytext through to parser --- Zend/zend_compile.h | 1 + Zend/zend_language_parser.y | 160 +++++++++++++++++---------------- Zend/zend_language_scanner.l | 168 +++++++++++++++++++---------------- 3 files changed, 174 insertions(+), 155 deletions(-) diff --git a/Zend/zend_compile.h b/Zend/zend_compile.h index 9a1e5adaafe1..c46ce43734cc 100644 --- a/Zend/zend_compile.h +++ b/Zend/zend_compile.h @@ -122,6 +122,7 @@ typedef union _zend_parser_stack_elem { zend_string *str; zend_ulong num; unsigned char *ptr; + char *ident; } zend_parser_stack_elem; void zend_compile_top_stmt(zend_ast *ast); diff --git a/Zend/zend_language_parser.y b/Zend/zend_language_parser.y index 4fdc05909bf0..80a57514b9fe 100644 --- a/Zend/zend_language_parser.y +++ b/Zend/zend_language_parser.y @@ -96,18 +96,84 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %token T_STRING_VARNAME "variable name (T_STRING_VARNAME)" %token T_NUM_STRING "number (T_NUM_STRING)" +%token T_INCLUDE "include (T_INCLUDE)" +%token T_INCLUDE_ONCE "include_once (T_INCLUDE_ONCE)" +%token T_EVAL "eval (T_EVAL)" +%token T_REQUIRE "require (T_REQUIRE)" +%token T_REQUIRE_ONCE "require_once (T_REQUIRE_ONCE)" +%token T_LOGICAL_OR "or (T_LOGICAL_OR)" +%token T_LOGICAL_XOR "xor (T_LOGICAL_XOR)" +%token T_LOGICAL_AND "and (T_LOGICAL_AND)" +%token T_PRINT "print (T_PRINT)" +%token T_YIELD "yield (T_YIELD)" +%token T_YIELD_FROM "yield from (T_YIELD_FROM)" +%token T_INSTANCEOF "instanceof (T_INSTANCEOF)" +%token T_NEW "new (T_NEW)" +%token T_CLONE "clone (T_CLONE)" +%token T_EXIT "exit (T_EXIT)" +%token T_IF "if (T_IF)" +%token T_ELSEIF "elseif (T_ELSEIF)" +%token T_ELSE "else (T_ELSE)" +%token T_ENDIF "endif (T_ENDIF)" +%token T_ECHO "echo (T_ECHO)" +%token T_DO "do (T_DO)" +%token T_WHILE "while (T_WHILE)" +%token T_ENDWHILE "endwhile (T_ENDWHILE)" +%token T_FOR "for (T_FOR)" +%token T_ENDFOR "endfor (T_ENDFOR)" +%token T_FOREACH "foreach (T_FOREACH)" +%token T_ENDFOREACH "endforeach (T_ENDFOREACH)" +%token T_DECLARE "declare (T_DECLARE)" +%token T_ENDDECLARE "enddeclare (T_ENDDECLARE)" +%token T_AS "as (T_AS)" +%token T_SWITCH "switch (T_SWITCH)" +%token T_ENDSWITCH "endswitch (T_ENDSWITCH)" +%token T_CASE "case (T_CASE)" +%token T_DEFAULT "default (T_DEFAULT)" +%token T_BREAK "break (T_BREAK)" +%token T_CONTINUE "continue (T_CONTINUE)" +%token T_GOTO "goto (T_GOTO)" +%token T_FUNCTION "function (T_FUNCTION)" +%token T_FN "fn (T_FN)" +%token T_CONST "const (T_CONST)" +%token T_RETURN "return (T_RETURN)" +%token T_TRY "try (T_TRY)" +%token T_CATCH "catch (T_CATCH)" +%token T_FINALLY "finally (T_FINALLY)" +%token T_THROW "throw (T_THROW)" +%token T_USE "use (T_USE)" +%token T_INSTEADOF "insteadof (T_INSTEADOF)" +%token T_GLOBAL "global (T_GLOBAL)" +%token T_STATIC "static (T_STATIC)" +%token T_ABSTRACT "abstract (T_ABSTRACT)" +%token T_FINAL "final (T_FINAL)" +%token T_PRIVATE "private (T_PRIVATE)" +%token T_PROTECTED "protected (T_PROTECTED)" +%token T_PUBLIC "public (T_PUBLIC)" +%token T_VAR "var (T_VAR)" +%token T_UNSET "unset (T_UNSET)" +%token T_ISSET "isset (T_ISSET)" +%token T_EMPTY "empty (T_EMPTY)" +%token T_HALT_COMPILER "__halt_compiler (T_HALT_COMPILER)" +%token T_CLASS "class (T_CLASS)" +%token T_TRAIT "trait (T_TRAIT)" +%token T_INTERFACE "interface (T_INTERFACE)" +%token T_EXTENDS "extends (T_EXTENDS)" +%token T_IMPLEMENTS "implements (T_IMPLEMENTS)" +%token T_NAMESPACE "namespace (T_NAMESPACE)" +%token T_LIST "list (T_LIST)" +%token T_ARRAY "array (T_ARRAY)" +%token T_CALLABLE "callable (T_CALLABLE)" +%token T_LINE "__LINE__ (T_LINE)" +%token T_FILE "__FILE__ (T_FILE)" +%token T_DIR "__DIR__ (T_DIR)" +%token T_CLASS_C "__CLASS__ (T_CLASS_C)" +%token T_TRAIT_C "__TRAIT__ (T_TRAIT_C)" +%token T_METHOD_C "__METHOD__ (T_METHOD_C)" +%token T_FUNC_C "__FUNCTION__ (T_FUNC_C)" +%token T_NS_C "__NAMESPACE__ (T_NS_C)" + %token END 0 "end of file" -%token T_INCLUDE "include (T_INCLUDE)" -%token T_INCLUDE_ONCE "include_once (T_INCLUDE_ONCE)" -%token T_EVAL "eval (T_EVAL)" -%token T_REQUIRE "require (T_REQUIRE)" -%token T_REQUIRE_ONCE "require_once (T_REQUIRE_ONCE)" -%token T_LOGICAL_OR "or (T_LOGICAL_OR)" -%token T_LOGICAL_XOR "xor (T_LOGICAL_XOR)" -%token T_LOGICAL_AND "and (T_LOGICAL_AND)" -%token T_PRINT "print (T_PRINT)" -%token T_YIELD "yield (T_YIELD)" -%token T_YIELD_FROM "yield from (T_YIELD_FROM)" %token T_PLUS_EQUAL "+= (T_PLUS_EQUAL)" %token T_MINUS_EQUAL "-= (T_MINUS_EQUAL)" %token T_MUL_EQUAL "*= (T_MUL_EQUAL)" @@ -131,7 +197,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %token T_SPACESHIP "<=> (T_SPACESHIP)" %token T_SL "<< (T_SL)" %token T_SR ">> (T_SR)" -%token T_INSTANCEOF "instanceof (T_INSTANCEOF)" %token T_INC "++ (T_INC)" %token T_DEC "-- (T_DEC)" %token T_INT_CAST "(int) (T_INT_CAST)" @@ -141,70 +206,8 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %token T_OBJECT_CAST "(object) (T_OBJECT_CAST)" %token T_BOOL_CAST "(bool) (T_BOOL_CAST)" %token T_UNSET_CAST "(unset) (T_UNSET_CAST)" -%token T_NEW "new (T_NEW)" -%token T_CLONE "clone (T_CLONE)" -%token T_EXIT "exit (T_EXIT)" -%token T_IF "if (T_IF)" -%token T_ELSEIF "elseif (T_ELSEIF)" -%token T_ELSE "else (T_ELSE)" -%token T_ENDIF "endif (T_ENDIF)" -%token T_ECHO "echo (T_ECHO)" -%token T_DO "do (T_DO)" -%token T_WHILE "while (T_WHILE)" -%token T_ENDWHILE "endwhile (T_ENDWHILE)" -%token T_FOR "for (T_FOR)" -%token T_ENDFOR "endfor (T_ENDFOR)" -%token T_FOREACH "foreach (T_FOREACH)" -%token T_ENDFOREACH "endforeach (T_ENDFOREACH)" -%token T_DECLARE "declare (T_DECLARE)" -%token T_ENDDECLARE "enddeclare (T_ENDDECLARE)" -%token T_AS "as (T_AS)" -%token T_SWITCH "switch (T_SWITCH)" -%token T_ENDSWITCH "endswitch (T_ENDSWITCH)" -%token T_CASE "case (T_CASE)" -%token T_DEFAULT "default (T_DEFAULT)" -%token T_BREAK "break (T_BREAK)" -%token T_CONTINUE "continue (T_CONTINUE)" -%token T_GOTO "goto (T_GOTO)" -%token T_FUNCTION "function (T_FUNCTION)" -%token T_FN "fn (T_FN)" -%token T_CONST "const (T_CONST)" -%token T_RETURN "return (T_RETURN)" -%token T_TRY "try (T_TRY)" -%token T_CATCH "catch (T_CATCH)" -%token T_FINALLY "finally (T_FINALLY)" -%token T_THROW "throw (T_THROW)" -%token T_USE "use (T_USE)" -%token T_INSTEADOF "insteadof (T_INSTEADOF)" -%token T_GLOBAL "global (T_GLOBAL)" -%token T_STATIC "static (T_STATIC)" -%token T_ABSTRACT "abstract (T_ABSTRACT)" -%token T_FINAL "final (T_FINAL)" -%token T_PRIVATE "private (T_PRIVATE)" -%token T_PROTECTED "protected (T_PROTECTED)" -%token T_PUBLIC "public (T_PUBLIC)" -%token T_VAR "var (T_VAR)" -%token T_UNSET "unset (T_UNSET)" -%token T_ISSET "isset (T_ISSET)" -%token T_EMPTY "empty (T_EMPTY)" -%token T_HALT_COMPILER "__halt_compiler (T_HALT_COMPILER)" -%token T_CLASS "class (T_CLASS)" -%token T_TRAIT "trait (T_TRAIT)" -%token T_INTERFACE "interface (T_INTERFACE)" -%token T_EXTENDS "extends (T_EXTENDS)" -%token T_IMPLEMENTS "implements (T_IMPLEMENTS)" %token T_OBJECT_OPERATOR "-> (T_OBJECT_OPERATOR)" %token T_DOUBLE_ARROW "=> (T_DOUBLE_ARROW)" -%token T_LIST "list (T_LIST)" -%token T_ARRAY "array (T_ARRAY)" -%token T_CALLABLE "callable (T_CALLABLE)" -%token T_LINE "__LINE__ (T_LINE)" -%token T_FILE "__FILE__ (T_FILE)" -%token T_DIR "__DIR__ (T_DIR)" -%token T_CLASS_C "__CLASS__ (T_CLASS_C)" -%token T_TRAIT_C "__TRAIT__ (T_TRAIT_C)" -%token T_METHOD_C "__METHOD__ (T_METHOD_C)" -%token T_FUNC_C "__FUNCTION__ (T_FUNC_C)" %token T_COMMENT "comment (T_COMMENT)" %token T_DOC_COMMENT "doc comment (T_DOC_COMMENT)" %token T_OPEN_TAG "open tag (T_OPEN_TAG)" @@ -216,8 +219,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %token T_DOLLAR_OPEN_CURLY_BRACES "${ (T_DOLLAR_OPEN_CURLY_BRACES)" %token T_CURLY_OPEN "{$ (T_CURLY_OPEN)" %token T_PAAMAYIM_NEKUDOTAYIM ":: (T_PAAMAYIM_NEKUDOTAYIM)" -%token T_NAMESPACE "namespace (T_NAMESPACE)" -%token T_NS_C "__NAMESPACE__ (T_NS_C)" %token T_NS_SEPARATOR "\\ (T_NS_SEPARATOR)" %token T_ELLIPSIS "... (T_ELLIPSIS)" %token T_COALESCE "?? (T_COALESCE)" @@ -268,6 +269,8 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %type backup_lex_pos %type backup_doc_comment +%type reserved_non_modifiers semi_reserved + %% /* Rules */ start: @@ -293,7 +296,7 @@ identifier: T_STRING { $$ = $1; } | semi_reserved { zval zv; - zend_lex_tstring(&zv); + zend_lex_tstring(&zv, $1); $$ = zend_ast_create_zval(&zv); } ; @@ -847,7 +850,8 @@ trait_alias: trait_method_reference T_AS T_STRING { $$ = zend_ast_create(ZEND_AST_TRAIT_ALIAS, $1, $3); } | trait_method_reference T_AS reserved_non_modifiers - { zval zv; zend_lex_tstring(&zv); $$ = zend_ast_create(ZEND_AST_TRAIT_ALIAS, $1, zend_ast_create_zval(&zv)); } + { zval zv; zend_lex_tstring(&zv, $3); + $$ = zend_ast_create(ZEND_AST_TRAIT_ALIAS, $1, zend_ast_create_zval(&zv)); } | trait_method_reference T_AS member_modifier identifier { $$ = zend_ast_create_ex(ZEND_AST_TRAIT_ALIAS, $3, $1, $4); } | trait_method_reference T_AS member_modifier diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index 8a3e88edfc30..d8cc27aab6e9 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -1334,6 +1334,11 @@ static int check_nesting_at_end() goto emit_token_with_str; \ } while (0) +#define RETURN_TOKEN_WITH_IDENT(_token) do { \ + token = _token; \ + goto emit_token_with_ident; \ + } while (0) + #define RETURN_OR_SKIP_TOKEN(_token) do { \ token = _token; \ if (PARSER_MODE()) { \ @@ -1386,169 +1391,169 @@ NEWLINE ("\r"|"\n"|"\r\n") := yyleng = YYCURSOR - SCNG(yy_text); "exit" { - RETURN_TOKEN(T_EXIT); + RETURN_TOKEN_WITH_IDENT(T_EXIT); } "die" { - RETURN_TOKEN(T_EXIT); + RETURN_TOKEN_WITH_IDENT(T_EXIT); } "fn" { - RETURN_TOKEN(T_FN); + RETURN_TOKEN_WITH_IDENT(T_FN); } "function" { - RETURN_TOKEN(T_FUNCTION); + RETURN_TOKEN_WITH_IDENT(T_FUNCTION); } "const" { - RETURN_TOKEN(T_CONST); + RETURN_TOKEN_WITH_IDENT(T_CONST); } "return" { - RETURN_TOKEN(T_RETURN); + RETURN_TOKEN_WITH_IDENT(T_RETURN); } "yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] { yyless(yyleng - 1); HANDLE_NEWLINES(yytext, yyleng); - RETURN_TOKEN(T_YIELD_FROM); + RETURN_TOKEN_WITH_IDENT(T_YIELD_FROM); } "yield" { - RETURN_TOKEN(T_YIELD); + RETURN_TOKEN_WITH_IDENT(T_YIELD); } "try" { - RETURN_TOKEN(T_TRY); + RETURN_TOKEN_WITH_IDENT(T_TRY); } "catch" { - RETURN_TOKEN(T_CATCH); + RETURN_TOKEN_WITH_IDENT(T_CATCH); } "finally" { - RETURN_TOKEN(T_FINALLY); + RETURN_TOKEN_WITH_IDENT(T_FINALLY); } "throw" { - RETURN_TOKEN(T_THROW); + RETURN_TOKEN_WITH_IDENT(T_THROW); } "if" { - RETURN_TOKEN(T_IF); + RETURN_TOKEN_WITH_IDENT(T_IF); } "elseif" { - RETURN_TOKEN(T_ELSEIF); + RETURN_TOKEN_WITH_IDENT(T_ELSEIF); } "endif" { - RETURN_TOKEN(T_ENDIF); + RETURN_TOKEN_WITH_IDENT(T_ENDIF); } "else" { - RETURN_TOKEN(T_ELSE); + RETURN_TOKEN_WITH_IDENT(T_ELSE); } "while" { - RETURN_TOKEN(T_WHILE); + RETURN_TOKEN_WITH_IDENT(T_WHILE); } "endwhile" { - RETURN_TOKEN(T_ENDWHILE); + RETURN_TOKEN_WITH_IDENT(T_ENDWHILE); } "do" { - RETURN_TOKEN(T_DO); + RETURN_TOKEN_WITH_IDENT(T_DO); } "for" { - RETURN_TOKEN(T_FOR); + RETURN_TOKEN_WITH_IDENT(T_FOR); } "endfor" { - RETURN_TOKEN(T_ENDFOR); + RETURN_TOKEN_WITH_IDENT(T_ENDFOR); } "foreach" { - RETURN_TOKEN(T_FOREACH); + RETURN_TOKEN_WITH_IDENT(T_FOREACH); } "endforeach" { - RETURN_TOKEN(T_ENDFOREACH); + RETURN_TOKEN_WITH_IDENT(T_ENDFOREACH); } "declare" { - RETURN_TOKEN(T_DECLARE); + RETURN_TOKEN_WITH_IDENT(T_DECLARE); } "enddeclare" { - RETURN_TOKEN(T_ENDDECLARE); + RETURN_TOKEN_WITH_IDENT(T_ENDDECLARE); } "instanceof" { - RETURN_TOKEN(T_INSTANCEOF); + RETURN_TOKEN_WITH_IDENT(T_INSTANCEOF); } "as" { - RETURN_TOKEN(T_AS); + RETURN_TOKEN_WITH_IDENT(T_AS); } "switch" { - RETURN_TOKEN(T_SWITCH); + RETURN_TOKEN_WITH_IDENT(T_SWITCH); } "endswitch" { - RETURN_TOKEN(T_ENDSWITCH); + RETURN_TOKEN_WITH_IDENT(T_ENDSWITCH); } "case" { - RETURN_TOKEN(T_CASE); + RETURN_TOKEN_WITH_IDENT(T_CASE); } "default" { - RETURN_TOKEN(T_DEFAULT); + RETURN_TOKEN_WITH_IDENT(T_DEFAULT); } "break" { - RETURN_TOKEN(T_BREAK); + RETURN_TOKEN_WITH_IDENT(T_BREAK); } "continue" { - RETURN_TOKEN(T_CONTINUE); + RETURN_TOKEN_WITH_IDENT(T_CONTINUE); } "goto" { - RETURN_TOKEN(T_GOTO); + RETURN_TOKEN_WITH_IDENT(T_GOTO); } "echo" { - RETURN_TOKEN(T_ECHO); + RETURN_TOKEN_WITH_IDENT(T_ECHO); } "print" { - RETURN_TOKEN(T_PRINT); + RETURN_TOKEN_WITH_IDENT(T_PRINT); } "class" { - RETURN_TOKEN(T_CLASS); + RETURN_TOKEN_WITH_IDENT(T_CLASS); } "interface" { - RETURN_TOKEN(T_INTERFACE); + RETURN_TOKEN_WITH_IDENT(T_INTERFACE); } "trait" { - RETURN_TOKEN(T_TRAIT); + RETURN_TOKEN_WITH_IDENT(T_TRAIT); } "extends" { - RETURN_TOKEN(T_EXTENDS); + RETURN_TOKEN_WITH_IDENT(T_EXTENDS); } "implements" { - RETURN_TOKEN(T_IMPLEMENTS); + RETURN_TOKEN_WITH_IDENT(T_IMPLEMENTS); } "->" { @@ -1592,15 +1597,15 @@ NEWLINE ("\r"|"\n"|"\r\n") } "new" { - RETURN_TOKEN(T_NEW); + RETURN_TOKEN_WITH_IDENT(T_NEW); } "clone" { - RETURN_TOKEN(T_CLONE); + RETURN_TOKEN_WITH_IDENT(T_CLONE); } "var" { - RETURN_TOKEN(T_VAR); + RETURN_TOKEN_WITH_IDENT(T_VAR); } "("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" { @@ -1640,79 +1645,79 @@ NEWLINE ("\r"|"\n"|"\r\n") } "eval" { - RETURN_TOKEN(T_EVAL); + RETURN_TOKEN_WITH_IDENT(T_EVAL); } "include" { - RETURN_TOKEN(T_INCLUDE); + RETURN_TOKEN_WITH_IDENT(T_INCLUDE); } "include_once" { - RETURN_TOKEN(T_INCLUDE_ONCE); + RETURN_TOKEN_WITH_IDENT(T_INCLUDE_ONCE); } "require" { - RETURN_TOKEN(T_REQUIRE); + RETURN_TOKEN_WITH_IDENT(T_REQUIRE); } "require_once" { - RETURN_TOKEN(T_REQUIRE_ONCE); + RETURN_TOKEN_WITH_IDENT(T_REQUIRE_ONCE); } "namespace" { - RETURN_TOKEN(T_NAMESPACE); + RETURN_TOKEN_WITH_IDENT(T_NAMESPACE); } "use" { - RETURN_TOKEN(T_USE); + RETURN_TOKEN_WITH_IDENT(T_USE); } "insteadof" { - RETURN_TOKEN(T_INSTEADOF); + RETURN_TOKEN_WITH_IDENT(T_INSTEADOF); } "global" { - RETURN_TOKEN(T_GLOBAL); + RETURN_TOKEN_WITH_IDENT(T_GLOBAL); } "isset" { - RETURN_TOKEN(T_ISSET); + RETURN_TOKEN_WITH_IDENT(T_ISSET); } "empty" { - RETURN_TOKEN(T_EMPTY); + RETURN_TOKEN_WITH_IDENT(T_EMPTY); } "__halt_compiler" { - RETURN_TOKEN(T_HALT_COMPILER); + RETURN_TOKEN_WITH_IDENT(T_HALT_COMPILER); } "static" { - RETURN_TOKEN(T_STATIC); + RETURN_TOKEN_WITH_IDENT(T_STATIC); } "abstract" { - RETURN_TOKEN(T_ABSTRACT); + RETURN_TOKEN_WITH_IDENT(T_ABSTRACT); } "final" { - RETURN_TOKEN(T_FINAL); + RETURN_TOKEN_WITH_IDENT(T_FINAL); } "private" { - RETURN_TOKEN(T_PRIVATE); + RETURN_TOKEN_WITH_IDENT(T_PRIVATE); } "protected" { - RETURN_TOKEN(T_PROTECTED); + RETURN_TOKEN_WITH_IDENT(T_PROTECTED); } "public" { - RETURN_TOKEN(T_PUBLIC); + RETURN_TOKEN_WITH_IDENT(T_PUBLIC); } "unset" { - RETURN_TOKEN(T_UNSET); + RETURN_TOKEN_WITH_IDENT(T_UNSET); } "=>" { @@ -1720,15 +1725,15 @@ NEWLINE ("\r"|"\n"|"\r\n") } "list" { - RETURN_TOKEN(T_LIST); + RETURN_TOKEN_WITH_IDENT(T_LIST); } "array" { - RETURN_TOKEN(T_ARRAY); + RETURN_TOKEN_WITH_IDENT(T_ARRAY); } "callable" { - RETURN_TOKEN(T_CALLABLE); + RETURN_TOKEN_WITH_IDENT(T_CALLABLE); } "++" { @@ -1832,15 +1837,15 @@ NEWLINE ("\r"|"\n"|"\r\n") } "OR" { - RETURN_TOKEN(T_LOGICAL_OR); + RETURN_TOKEN_WITH_IDENT(T_LOGICAL_OR); } "AND" { - RETURN_TOKEN(T_LOGICAL_AND); + RETURN_TOKEN_WITH_IDENT(T_LOGICAL_AND); } "XOR" { - RETURN_TOKEN(T_LOGICAL_XOR); + RETURN_TOKEN_WITH_IDENT(T_LOGICAL_XOR); } "<<" { @@ -2096,35 +2101,35 @@ string: } "__CLASS__" { - RETURN_TOKEN(T_CLASS_C); + RETURN_TOKEN_WITH_IDENT(T_CLASS_C); } "__TRAIT__" { - RETURN_TOKEN(T_TRAIT_C); + RETURN_TOKEN_WITH_IDENT(T_TRAIT_C); } "__FUNCTION__" { - RETURN_TOKEN(T_FUNC_C); + RETURN_TOKEN_WITH_IDENT(T_FUNC_C); } "__METHOD__" { - RETURN_TOKEN(T_METHOD_C); + RETURN_TOKEN_WITH_IDENT(T_METHOD_C); } "__LINE__" { - RETURN_TOKEN(T_LINE); + RETURN_TOKEN_WITH_IDENT(T_LINE); } "__FILE__" { - RETURN_TOKEN(T_FILE); + RETURN_TOKEN_WITH_IDENT(T_FILE); } "__DIR__" { - RETURN_TOKEN(T_DIR); + RETURN_TOKEN_WITH_IDENT(T_DIR); } "__NAMESPACE__" { - RETURN_TOKEN(T_NS_C); + RETURN_TOKEN_WITH_IDENT(T_NS_C); } "#!" .* {NEWLINE} { @@ -3017,6 +3022,15 @@ emit_token: } return token; +emit_token_with_ident: + if (PARSER_MODE()) { + elem->ident = yytext; + } + if (SCNG(on_event)) { + SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context)); + } + return token; + return_whitespace: HANDLE_NEWLINES(yytext, yyleng); if (SCNG(on_event)) { From 87ad9c0e1121fca6a28aa825a9b9dca05197cc50 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 5 Jun 2020 17:12:14 +0200 Subject: [PATCH 2/5] Use passed ident in lex_tstring --- Zend/zend_compile.h | 7 ++++++- Zend/zend_language_scanner.h | 2 +- Zend/zend_language_scanner.l | 8 +++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Zend/zend_compile.h b/Zend/zend_compile.h index c46ce43734cc..3fe983e6a6f5 100644 --- a/Zend/zend_compile.h +++ b/Zend/zend_compile.h @@ -117,12 +117,17 @@ typedef struct _zend_file_context { HashTable seen_symbols; } zend_file_context; +typedef struct { + uint32_t offset; + uint32_t len; +} zend_lexer_ident_ref; + typedef union _zend_parser_stack_elem { zend_ast *ast; zend_string *str; zend_ulong num; unsigned char *ptr; - char *ident; + zend_lexer_ident_ref ident; } zend_parser_stack_elem; void zend_compile_top_stmt(zend_ast *ast); diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h index 35eccaf7e631..9f0e7734c862 100644 --- a/Zend/zend_language_scanner.h +++ b/Zend/zend_language_scanner.h @@ -76,7 +76,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state); ZEND_API int zend_prepare_string_for_scanning(zval *str, const char *filename); ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding); ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding); -ZEND_API void zend_lex_tstring(zval *zv); +ZEND_API void zend_lex_tstring(zval *zv, zend_lexer_ident_ref ident_ref); END_EXTERN_C() diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index d8cc27aab6e9..90c8c3a23b46 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -306,13 +306,14 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle) } } -ZEND_API void zend_lex_tstring(zval *zv) +ZEND_API void zend_lex_tstring(zval *zv, zend_lexer_ident_ref ident_ref) { + char *ident = (char *) SCNG(yy_start) + ident_ref.offset; if (SCNG(on_event)) { SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context)); } - ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng)); + ZVAL_STRINGL(zv, ident, ident_ref.len); } #define BOM_UTF32_BE "\x00\x00\xfe\xff" @@ -3024,7 +3025,8 @@ emit_token: emit_token_with_ident: if (PARSER_MODE()) { - elem->ident = yytext; + elem->ident.offset = SCNG(yy_text) - SCNG(yy_start); + elem->ident.len = SCNG(yy_leng); } if (SCNG(on_event)) { SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context)); From 694442f87cf1d6e9fe42bf8b00d6c7e961aa208c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 5 Jun 2020 17:14:52 +0200 Subject: [PATCH 3/5] Add test --- Zend/tests/bug77966.phpt | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 Zend/tests/bug77966.phpt diff --git a/Zend/tests/bug77966.phpt b/Zend/tests/bug77966.phpt new file mode 100644 index 000000000000..1e859be8e1b1 --- /dev/null +++ b/Zend/tests/bug77966.phpt @@ -0,0 +1,25 @@ +--TEST-- +Bug #77966: Cannot alias a method named "namespace" +--FILE-- +bar(); +$c->namespace(); + +?> +--EXPECT-- +Called +Called From 26ff4ed492e959c33d428756e19762a01f3d5308 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 5 Jun 2020 18:08:42 +0200 Subject: [PATCH 4/5] Add broken tokenizer test And fix handling of on_event in object mode --- ext/tokenizer/tests/bug77966.phpt | 49 +++++++++++++++++++++++++++++++ ext/tokenizer/tokenizer.c | 16 +++++----- 2 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 ext/tokenizer/tests/bug77966.phpt diff --git a/ext/tokenizer/tests/bug77966.phpt b/ext/tokenizer/tests/bug77966.phpt new file mode 100644 index 000000000000..3bbfe4b82e79 --- /dev/null +++ b/ext/tokenizer/tests/bug77966.phpt @@ -0,0 +1,49 @@ +--TEST-- +Handling of "namespace as" in TOKEN_PARSE mode +--FILE-- +getTokenName()}: \"$token->text\"\n"; +} + +?> +--EXPECT-- +T_OPEN_TAG: "tokens, token, LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->token_class, NULL); break; - case ON_FEEDBACK: - tokens_ht = Z_ARRVAL_P(ctx->tokens); - token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - 1); + case ON_FEEDBACK: { + HashTable *tokens_ht = Z_ARRVAL_P(ctx->tokens); + zval *token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - 1); + zval *id_zv; ZEND_ASSERT(token_zv); if (Z_TYPE_P(token_zv) == IS_ARRAY) { - ZVAL_LONG(zend_hash_index_find(Z_ARRVAL_P(token_zv), 0), token); + id_zv = zend_hash_index_find(Z_ARRVAL_P(token_zv), 0); } else { - zend_update_property_long(php_token_ce, token_zv, "type", sizeof("type")-1, token); + ZEND_ASSERT(Z_TYPE_P(token_zv) == IS_OBJECT); + id_zv = OBJ_PROP_NUM(Z_OBJ_P(token_zv), 0); } + ZVAL_LONG(id_zv, token); break; + } case ON_STOP: if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) { add_token(ctx->tokens, T_INLINE_HTML, LANG_SCNG(yy_cursor), From e5ff4ebfef0ed97bfe881e3a964ae91b7509c9f3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 5 Jun 2020 18:28:00 +0200 Subject: [PATCH 5/5] Correctly handle feedback that goes multiple tokens back --- Zend/zend_compile.c | 2 +- Zend/zend_globals.h | 4 ++- Zend/zend_language_scanner.h | 4 ++- Zend/zend_language_scanner.l | 13 +++++---- ext/tokenizer/tests/bug77966.phpt | 4 +-- ext/tokenizer/tokenizer.c | 47 +++++++++++++++++++++++-------- 6 files changed, 51 insertions(+), 23 deletions(-) diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index 83c2573d791d..e78e5c4339f2 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -653,7 +653,7 @@ static int zend_add_const_name_literal(zend_string *name, zend_bool unqualified) void zend_stop_lexing(void) { if (LANG_SCNG(on_event)) { - LANG_SCNG(on_event)(ON_STOP, END, 0, LANG_SCNG(on_event_context)); + LANG_SCNG(on_event)(ON_STOP, END, 0, NULL, 0, LANG_SCNG(on_event_context)); } LANG_SCNG(yy_cursor) = LANG_SCNG(yy_limit); diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 1d64d001f646..60d32c0335e3 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -311,7 +311,9 @@ struct _zend_php_scanner_globals { int scanned_string_len; /* hooks */ - void (*on_event)(zend_php_scanner_event event, int token, int line, void *context); + void (*on_event)( + zend_php_scanner_event event, int token, int line, + const char *text, size_t length, void *context); void *on_event_context; }; diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h index 9f0e7734c862..35d4d0269e55 100644 --- a/Zend/zend_language_scanner.h +++ b/Zend/zend_language_scanner.h @@ -50,7 +50,9 @@ typedef struct _zend_lex_state { const zend_encoding *script_encoding; /* hooks */ - void (*on_event)(zend_php_scanner_event event, int token, int line, void *context); + void (*on_event)( + zend_php_scanner_event event, int token, int line, + const char *text, size_t length, void *context); void *on_event_context; zend_ast *ast; diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index 90c8c3a23b46..4aa024a69adc 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -309,11 +309,12 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle) ZEND_API void zend_lex_tstring(zval *zv, zend_lexer_ident_ref ident_ref) { char *ident = (char *) SCNG(yy_start) + ident_ref.offset; + size_t length = ident_ref.len; if (SCNG(on_event)) { - SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context)); + SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, ident, length, SCNG(on_event_context)); } - ZVAL_STRINGL(zv, ident, ident_ref.len); + ZVAL_STRINGL(zv, ident, length); } #define BOM_UTF32_BE "\x00\x00\xfe\xff" @@ -3019,7 +3020,7 @@ emit_token_with_val: emit_token: if (SCNG(on_event)) { - SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context)); + SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context)); } return token; @@ -3029,14 +3030,14 @@ emit_token_with_ident: elem->ident.len = SCNG(yy_leng); } if (SCNG(on_event)) { - SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context)); + SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context)); } return token; return_whitespace: HANDLE_NEWLINES(yytext, yyleng); if (SCNG(on_event)) { - SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, SCNG(on_event_context)); + SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, yytext, yyleng, SCNG(on_event_context)); } if (PARSER_MODE()) { start_line = CG(zend_lineno); @@ -3047,7 +3048,7 @@ return_whitespace: skip_token: if (SCNG(on_event)) { - SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context)); + SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context)); } start_line = CG(zend_lineno); goto restart; diff --git a/ext/tokenizer/tests/bug77966.phpt b/ext/tokenizer/tests/bug77966.phpt index 3bbfe4b82e79..142cc7c9aba6 100644 --- a/ext/tokenizer/tests/bug77966.phpt +++ b/ext/tokenizer/tests/bug77966.phpt @@ -35,9 +35,9 @@ T_WHITESPACE: " " {: "{" T_WHITESPACE: " " -T_NAMESPACE: "namespace" +T_STRING: "namespace" T_WHITESPACE: " " -T_STRING: "as" +T_AS: "as" T_WHITESPACE: " " T_STRING: "bar" ;: ";" diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c index c0a2bae9178b..901e609d2d86 100644 --- a/ext/tokenizer/tokenizer.c +++ b/ext/tokenizer/tokenizer.c @@ -420,7 +420,31 @@ struct event_context { zend_class_entry *token_class; }; -void on_event(zend_php_scanner_event event, int token, int line, void *context) +static zval *extract_token_id_to_replace(zval *token_zv, const char *text, size_t length) { + zval *id_zv, *text_zv; + ZEND_ASSERT(token_zv); + if (Z_TYPE_P(token_zv) == IS_ARRAY) { + id_zv = zend_hash_index_find(Z_ARRVAL_P(token_zv), 0); + text_zv = zend_hash_index_find(Z_ARRVAL_P(token_zv), 1); + } else if (Z_TYPE_P(token_zv) == IS_OBJECT) { + id_zv = OBJ_PROP_NUM(Z_OBJ_P(token_zv), 0); + text_zv = OBJ_PROP_NUM(Z_OBJ_P(token_zv), 1); + } else { + return NULL; + } + + /* There are multiple candidate tokens to which this feedback may apply, + * check text to make sure this is the right one. */ + ZEND_ASSERT(Z_TYPE_P(text_zv) == IS_STRING); + if (Z_STRLEN_P(text_zv) == length && !memcmp(Z_STRVAL_P(text_zv), text, length)) { + return id_zv; + } + return NULL; +} + +void on_event( + zend_php_scanner_event event, int token, int line, + const char *text, size_t length, void *context) { struct event_context *ctx = context; @@ -433,20 +457,19 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context) } else if (token == T_ECHO && LANG_SCNG(yy_leng) == sizeof("tokens, token, - LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->token_class, NULL); + add_token( + ctx->tokens, token, (unsigned char *) text, length, line, ctx->token_class, NULL); break; case ON_FEEDBACK: { HashTable *tokens_ht = Z_ARRVAL_P(ctx->tokens); - zval *token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - 1); - zval *id_zv; - ZEND_ASSERT(token_zv); - if (Z_TYPE_P(token_zv) == IS_ARRAY) { - id_zv = zend_hash_index_find(Z_ARRVAL_P(token_zv), 0); - } else { - ZEND_ASSERT(Z_TYPE_P(token_zv) == IS_OBJECT); - id_zv = OBJ_PROP_NUM(Z_OBJ_P(token_zv), 0); - } + zval *token_zv, *id_zv = NULL; + ZEND_HASH_REVERSE_FOREACH_VAL(tokens_ht, token_zv) { + id_zv = extract_token_id_to_replace(token_zv, text, length); + if (id_zv) { + break; + } + } ZEND_HASH_FOREACH_END(); + ZEND_ASSERT(id_zv); ZVAL_LONG(id_zv, token); break; }