From caa917516d03d78581b943e3ec183ade9889df0b Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 11 Mar 2023 00:48:50 +0100 Subject: [PATCH 1/4] Implement compile-time-eval for functions called with named arguments In GH-10811 I disabled compile-time-eval for function calls with named arguments because it was completely unsupported and thus would therefore crash or give semantically wrong results. This patch undoes the disabling and implements the necessary components to support CTE for named arguments. Since named arguments are a bit of a special case for the VM, I also had to add some special handling for the SCCP pass. I also had to tweak how the call removal code worked such that the named arguments, which are not in num_args, are also removed, as well as the CHECK_UNDEF_ARGS opline. --- Zend/Optimizer/sccp.c | 112 +++++++++++++++++++++++---- Zend/Optimizer/zend_call_graph.c | 4 +- Zend/Optimizer/zend_call_graph.h | 2 +- ext/opcache/tests/opt/sccp_042.phpt | 114 ++++++++++++++++++++++++++++ 4 files changed, 217 insertions(+), 15 deletions(-) create mode 100644 ext/opcache/tests/opt/sccp_042.phpt diff --git a/Zend/Optimizer/sccp.c b/Zend/Optimizer/sccp.c index f6144f87b4fac..f902aee8f53b9 100644 --- a/Zend/Optimizer/sccp.c +++ b/Zend/Optimizer/sccp.c @@ -83,6 +83,11 @@ typedef struct _sccp_ctx { zval bot; } sccp_ctx; +typedef struct _named_arg_pair { + zval *name; + zval *value; +} named_arg_pair; + #define TOP ((uint8_t)-1) #define BOT ((uint8_t)-2) #define PARTIAL_ARRAY ((uint8_t)-3) @@ -744,7 +749,7 @@ static inline zend_result ct_eval_array_key_exists(zval *result, zval *op1, zval return SUCCESS; } -static bool can_ct_eval_func_call(zend_function *func, zend_string *name, uint32_t num_args, zval **args) { +static bool can_ct_eval_func_call(zend_function *func, zend_string *name, uint32_t num_args, zval **args, uint32_t num_named_args) { /* Precondition: func->type == ZEND_INTERNAL_FUNCTION, this is a global function */ /* Functions setting ZEND_ACC_COMPILE_TIME_EVAL (@compile-time-eval) must always produce the same result for the same arguments, * and have no dependence on global state (such as locales). It is okay if they throw @@ -753,6 +758,13 @@ static bool can_ct_eval_func_call(zend_function *func, zend_string *name, uint32 /* This has @compile-time-eval in stub info and uses a macro such as ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE */ return true; } + + /* Has a named argument, but dirname doesn't expect that, and checking the str_repeat case is too complex. + * The complexity is not worth it for one function which will unlikely be used with named parameters. */ + if (num_named_args > 0) { + return false; + } + #ifndef ZEND_WIN32 /* On Windows this function may be code page dependent. */ if (zend_string_equals_literal(name, "dirname")) { @@ -779,7 +791,7 @@ static bool can_ct_eval_func_call(zend_function *func, zend_string *name, uint32 * or just happened to be commonly used with constant operands in WP (need to test other * applications as well, of course). */ static inline zend_result ct_eval_func_call( - zend_op_array *op_array, zval *result, zend_string *name, uint32_t num_args, zval **args) { + zend_op_array *op_array, zval *result, zend_string *name, uint32_t num_args, zval **args, named_arg_pair *named_args, uint32_t num_named_args) { uint32_t i; zend_function *func = zend_hash_find_ptr(CG(function_table), name); if (!func || func->type != ZEND_INTERNAL_FUNCTION) { @@ -791,7 +803,7 @@ static inline zend_result ct_eval_func_call( return SUCCESS; } - if (!can_ct_eval_func_call(func, name, num_args, args)) { + if (!can_ct_eval_func_call(func, name, num_args, args, num_named_args)) { return FAILURE; } @@ -821,12 +833,47 @@ static inline zend_result ct_eval_func_call( ZVAL_COPY(EX_VAR_NUM(i), args[i]); } ZVAL_NULL(result); - func->internal_function.handler(execute_data, result); + + zend_result retval = SUCCESS; + zval *named_args_copies[3] = {NULL}; + ZEND_ASSERT(num_named_args <= sizeof(named_args_copies) / sizeof(named_args_copies[0])); + + for (i = 0; i < num_named_args; i++) { + uint32_t arg_num_unused; + /* Need 2 cache slots for zend_get_arg_offset_by_name() */ + void *cache_slots[2] = {NULL}; + zval *arg = zend_handle_named_arg(&execute_data, Z_STR_P(named_args[i].name), &arg_num_unused, cache_slots); + if (!arg) { + retval = FAILURE; + break; + } + ZVAL_COPY(arg, named_args[i].value); + named_args_copies[i] = arg; + } + + if (retval == SUCCESS) { + /* Handle undef arguments in the same way as how the VM does it */ + if (UNEXPECTED(ZEND_CALL_INFO(execute_data) & ZEND_CALL_MAY_HAVE_UNDEF)) { + /* Have to hackisly set the current EX() back one frame because zend_handle_undef_args() + * temporarily starts its own "fake frame" for execute_data. */ + EG(current_execute_data) = &dummy_frame; + retval = zend_handle_undef_args(execute_data); + EG(current_execute_data) = execute_data; + } + if (retval == SUCCESS) { + func->internal_function.handler(execute_data, result); + } + } + for (i = 0; i < num_args; i++) { zval_ptr_dtor_nogc(EX_VAR_NUM(i)); } + for (i = 0; i < num_named_args; i++) { + if (named_args_copies[i]) { + zval_ptr_dtor_nogc(named_args_copies[i]); + } + } - zend_result retval = SUCCESS; if (EG(exception)) { zval_ptr_dtor(result); zend_clear_exception(); @@ -1631,7 +1678,8 @@ static void sccp_visit_instr(scdf_ctx *scdf, zend_op *opline, zend_ssa_op *ssa_o { zend_call_info *call; zval *name, *args[3] = {NULL}; - int i; + named_arg_pair named_args[3] = {{NULL, NULL}}; + unsigned int i; if (!ctx->call_map) { SET_RESULT_BOT(result); @@ -1646,9 +1694,8 @@ static void sccp_visit_instr(scdf_ctx *scdf, zend_op *opline, zend_ssa_op *ssa_o break; } - /* We're only interested in functions with up to three arguments right now. - * Note that named arguments with the argument in declaration order will still work. */ - if (call->num_args > 3 || call->send_unpack || call->is_prototype || call->named_args) { + /* We're only interested in functions with up to three positional arguments right now. */ + if (call->num_args > 3 || call->send_unpack || call->is_prototype) { SET_RESULT_BOT(result); break; } @@ -1672,12 +1719,44 @@ static void sccp_visit_instr(scdf_ctx *scdf, zend_op *opline, zend_ssa_op *ssa_o } } + i = 0; + if (call->first_named_arg.opline) { + for (zend_op *opline = call->first_named_arg.opline; opline != call->caller_call_opline; opline++, i++) { + if (opline->opcode == ZEND_CHECK_UNDEF_ARGS) { + break; + } + if ((opline->opcode != ZEND_SEND_VAL && opline->opcode != ZEND_SEND_VAR) + /* must have a name, which is a const */ + || opline->op2_type != IS_CONST + /* must not exceed the maximum number of named parameters */ + || i == sizeof(named_args) / sizeof(named_args[0])) { + SET_RESULT_BOT(result); + return; + } + zval *argument_name = get_op2_value(ctx, opline, + &ctx->scdf.ssa->ops[opline - ctx->scdf.op_array->opcodes]); + ZEND_ASSERT(Z_TYPE_P(argument_name) == IS_STRING); + zval *argument_value = get_op1_value(ctx, opline, + &ctx->scdf.ssa->ops[opline - ctx->scdf.op_array->opcodes]); + if (argument_value) { + if (IS_BOT(argument_value) || IS_PARTIAL_ARRAY(argument_value)) { + SET_RESULT_BOT(result); + return; + } else if (IS_TOP(argument_value)) { + return; + } + named_args[i].name = argument_name; + named_args[i].value = argument_value; + } + } + } + /* We didn't get a BOT argument, so value stays the same */ if (!IS_TOP(&ctx->values[ssa_op->result_def])) { break; } - if (ct_eval_func_call(scdf->op_array, &zv, Z_STR_P(name), call->num_args, args) == SUCCESS) { + if (ct_eval_func_call(scdf->op_array, &zv, Z_STR_P(name), call->num_args, args, named_args, i) == SUCCESS) { SET_RESULT(result, &zv); zval_ptr_dtor_nogc(&zv); break; @@ -2023,7 +2102,6 @@ static int remove_call(sccp_ctx *ctx, zend_op *opline, zend_ssa_op *ssa_op) zend_ssa *ssa = ctx->scdf.ssa; zend_op_array *op_array = ctx->scdf.op_array; zend_call_info *call; - int i; ZEND_ASSERT(ctx->call_map); call = ctx->call_map[opline - op_array->opcodes]; @@ -2033,15 +2111,23 @@ static int remove_call(sccp_ctx *ctx, zend_op *opline, zend_ssa_op *ssa_op) zend_ssa_remove_instr(ssa, call->caller_init_opline, &ssa->ops[call->caller_init_opline - op_array->opcodes]); - for (i = 0; i < call->num_args; i++) { + int removed = 2 + call->num_args; + for (int i = 0; i < call->num_args; i++) { zend_ssa_remove_instr(ssa, call->arg_info[i].opline, &ssa->ops[call->arg_info[i].opline - op_array->opcodes]); } + zend_op *named_arg = call->first_named_arg.opline; + if (named_arg) { + for (; named_arg != opline; named_arg++, removed++) { + zend_ssa_remove_instr(ssa, named_arg, + &ssa->ops[named_arg - op_array->opcodes]); + } + } // TODO: remove call_info completely??? call->callee_func = NULL; - return call->num_args + 2; + return removed; } /* This is a basic DCE pass we run after SCCP. It only works on those instructions those result diff --git a/Zend/Optimizer/zend_call_graph.c b/Zend/Optimizer/zend_call_graph.c index c2b7b00cbee13..84ceb09b065ff 100644 --- a/Zend/Optimizer/zend_call_graph.c +++ b/Zend/Optimizer/zend_call_graph.c @@ -125,7 +125,9 @@ ZEND_API void zend_analyze_calls(zend_arena **arena, zend_script *script, uint32 case ZEND_SEND_USER: if (call_info) { if (opline->op2_type == IS_CONST) { - call_info->named_args = 1; + if (!call_info->first_named_arg.opline) { + call_info->first_named_arg.opline = opline; + } break; } diff --git a/Zend/Optimizer/zend_call_graph.h b/Zend/Optimizer/zend_call_graph.h index 3a02425084d58..befdd4d219de3 100644 --- a/Zend/Optimizer/zend_call_graph.h +++ b/Zend/Optimizer/zend_call_graph.h @@ -36,9 +36,9 @@ struct _zend_call_info { zend_call_info *next_callee; bool recursive; bool send_unpack; /* Parameters passed by SEND_UNPACK or SEND_ARRAY */ - bool named_args; /* Function has named arguments */ bool is_prototype; /* An overridden child method may be called */ int num_args; /* Number of arguments, excluding named and variadic arguments */ + zend_send_arg_info first_named_arg; /* First named arg if function has named arguments */ zend_send_arg_info arg_info[1]; }; diff --git a/ext/opcache/tests/opt/sccp_042.phpt b/ext/opcache/tests/opt/sccp_042.phpt new file mode 100644 index 0000000000000..0ba934155257f --- /dev/null +++ b/ext/opcache/tests/opt/sccp_042.phpt @@ -0,0 +1,114 @@ +--TEST-- +SCCP 042: Optimisation for CTE calls with named arguments +--EXTENSIONS-- +opcache +--INI-- +opcache.enable=1 +opcache.enable_cli=1 +opcache.optimization_level=0xE0 +opcache.opt_debug_level=0x400000 +--FILE-- + 1], strict: true, filter_value: 0)); +print_r(array_keys(array: [1 => 1], filter_value: 0, strict: true)); +print_r(array_keys(strict: true, filter_value: 1, array: [1 => 1, 2 => 1, 3 => 9])); +print_r(array_keys([1 => 1, 2 => 1, 3 => 9], 1, true)); + +// The first one will already throw a fatal error. +// We can't put try-catch around these because then it won't optimize. +// We use opcache.opt_debug_level to show us the resulting SSA to verify the CTE code did the right thing. +print_r(array_keys([], strict: true)); +print_r(array_keys(array: [], filter_value: 0, array: [1])); +print_r(array_keys(array: [], test: 0, strict: true)); + +// No CTE possible +$generated = mt_rand(0, 10); +print_r(array_keys(array: [$generated], filter_value: $generated, strict: true)); + +?> +--EXPECTF-- +$_main: + ; (lines=53, args=0, vars=1, tmps=19, ssa_vars=12, no_loops) + ; (after dfa pass) + ; %s + ; return [long] RANGE[1..1] + ; #0.CV0($generated) [undef, ref, any] +BB0: + ; start exit lines=[0-52] + ; level=0 +0000 INIT_FCALL 1 96 string("print_r") +0001 SEND_VAL array(...) 1 +0002 DO_ICALL +0003 INIT_FCALL 1 96 string("print_r") +0004 SEND_VAL array(...) 1 +0005 DO_ICALL +0006 INIT_FCALL 1 96 string("print_r") +0007 SEND_VAL array(...) 1 +0008 DO_ICALL +0009 INIT_FCALL 1 96 string("print_r") +0010 SEND_VAL array(...) 1 +0011 DO_ICALL +0012 INIT_FCALL 1 96 string("print_r") +0013 INIT_FCALL 1 96 string("array_keys") +0014 SEND_VAL array(...) 1 +0015 SEND_VAL bool(true) string("strict") +0016 CHECK_UNDEF_ARGS +0017 #5.V9 [array [long] of [long, string]] = DO_ICALL +0018 SEND_VAR #5.V9 [array [long] of [long, string]] 1 +0019 DO_ICALL +0020 INIT_FCALL 1 96 string("print_r") +0021 INIT_FCALL 2 112 string("array_keys") +0022 SEND_VAL array(...) 1 +0023 SEND_VAL int(0) 2 +0024 SEND_VAL array(...) string("array") +0025 CHECK_UNDEF_ARGS +0026 #6.V11 [array [long] of [long, string]] = DO_ICALL +0027 SEND_VAR #6.V11 [array [long] of [long, string]] 1 +0028 DO_ICALL +0029 INIT_FCALL 1 96 string("print_r") +0030 INIT_FCALL 1 96 string("array_keys") +0031 SEND_VAL array(...) 1 +0032 SEND_VAL_EX int(0) string("test") +0033 SEND_VAL bool(true) string("strict") +0034 CHECK_UNDEF_ARGS +0035 #7.V13 [array [long] of [long, string]] = DO_ICALL +0036 SEND_VAR #7.V13 [array [long] of [long, string]] 1 +0037 DO_ICALL +0038 INIT_FCALL 2 112 string("mt_rand") +0039 SEND_VAL int(0) 1 +0040 SEND_VAL int(10) 2 +0041 #8.V15 [long] = DO_ICALL +0042 ASSIGN #0.CV0($generated) [undef, ref, any] -> #9.CV0($generated) [ref, any] #8.V15 [long] +0043 INIT_FCALL 1 96 string("print_r") +0044 INIT_FCALL 3 128 string("array_keys") +0045 #10.T17 [array [long] of [any]] = INIT_ARRAY 1 (packed) #9.CV0($generated) [ref, any] NEXT +0046 SEND_VAL #10.T17 [array [long] of [any]] 1 +0047 SEND_VAR #9.CV0($generated) [ref, any] 2 +0048 SEND_VAL bool(true) 3 +0049 #11.V18 [array [long] of [long, string]] = DO_ICALL +0050 SEND_VAR #11.V18 [array [long] of [long, string]] 1 +0051 DO_ICALL +0052 RETURN int(1) +Array +( +) +Array +( +) +Array +( + [0] => 1 + [1] => 2 +) +Array +( + [0] => 1 + [1] => 2 +) + +Fatal error: Uncaught ArgumentCountError: array_keys(): Argument #2 ($filter_value) must be passed explicitly, because the default value is not known in %s:%d +Stack trace: +#0 %s(%d): array_keys(Array, NULL, true) +#1 {main} + thrown in %s on line %d From de46fda4e7080297214f39c60f26a3aaf4586625 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 11 Mar 2023 00:59:49 +0100 Subject: [PATCH 2/4] Update now outdated, incorrect comments in gh10801.phpt Previously, the CTE did indeed not execute on function calls with named parameters. Now we can do this, so the comments need to be adjusted to not confuse the end user. The goal of the test remains the same though: we use different argument orders to check if there is a semantic difference. --- ext/opcache/tests/opt/gh10801.phpt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/opcache/tests/opt/gh10801.phpt b/ext/opcache/tests/opt/gh10801.phpt index 7260c1c40c11e..1fa007b7f7705 100644 --- a/ext/opcache/tests/opt/gh10801.phpt +++ b/ext/opcache/tests/opt/gh10801.phpt @@ -8,9 +8,9 @@ opcache.optimization_level=0xe0 opcache --FILE-- 1], strict: true, filter_value: 0)); -// Will not use named arguments and do CTE as expected +// Will not use named arguments, and must result in the same output print_r(array_keys(array: [1 => 1], filter_value: 0, strict: true)); ?> --EXPECT-- From 50857d0635c848abf753c894035f35c97c90e6b7 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 12 Mar 2023 12:56:44 +0100 Subject: [PATCH 3/4] Setup frame using push & free to properly set the VM stack top and end --- Zend/Optimizer/sccp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Zend/Optimizer/sccp.c b/Zend/Optimizer/sccp.c index f902aee8f53b9..b38bbf79baec9 100644 --- a/Zend/Optimizer/sccp.c +++ b/Zend/Optimizer/sccp.c @@ -818,8 +818,11 @@ static inline zend_result ct_eval_func_call( dummy_frame.opline = &dummy_opline; dummy_opline.opcode = ZEND_DO_FCALL; - execute_data = safe_emalloc(num_args, sizeof(zval), ZEND_CALL_FRAME_SLOT * sizeof(zval)); - memset(execute_data, 0, sizeof(zend_execute_data)); + execute_data = zend_vm_stack_push_call_frame(ZEND_CALL_TOP_FUNCTION, func, num_args, func->common.scope); + execute_data->return_value = NULL; + execute_data->symbol_table = NULL; + execute_data->run_time_cache = NULL; + execute_data->extra_named_params = NULL; execute_data->prev_execute_data = &dummy_frame; EG(current_execute_data) = execute_data; @@ -886,7 +889,7 @@ static inline zend_result ct_eval_func_call( } EG(capture_warnings_during_sccp) = 0; - efree(execute_data); + zend_vm_stack_free_call_frame(execute_data); EG(current_execute_data) = prev_execute_data; return retval; } From c1abbeed072bc588e27122de456a49eb38c64cd7 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 12 Mar 2023 16:56:35 +0100 Subject: [PATCH 4/4] Use %d to match the estimated stack size since it can differ because of optimisations --- ext/opcache/tests/opt/sccp_042.phpt | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ext/opcache/tests/opt/sccp_042.phpt b/ext/opcache/tests/opt/sccp_042.phpt index 0ba934155257f..b2ff0efb82635 100644 --- a/ext/opcache/tests/opt/sccp_042.phpt +++ b/ext/opcache/tests/opt/sccp_042.phpt @@ -37,28 +37,28 @@ $_main: BB0: ; start exit lines=[0-52] ; level=0 -0000 INIT_FCALL 1 96 string("print_r") +0000 INIT_FCALL 1 %d string("print_r") 0001 SEND_VAL array(...) 1 0002 DO_ICALL -0003 INIT_FCALL 1 96 string("print_r") +0003 INIT_FCALL 1 %d string("print_r") 0004 SEND_VAL array(...) 1 0005 DO_ICALL -0006 INIT_FCALL 1 96 string("print_r") +0006 INIT_FCALL 1 %d string("print_r") 0007 SEND_VAL array(...) 1 0008 DO_ICALL -0009 INIT_FCALL 1 96 string("print_r") +0009 INIT_FCALL 1 %d string("print_r") 0010 SEND_VAL array(...) 1 0011 DO_ICALL -0012 INIT_FCALL 1 96 string("print_r") -0013 INIT_FCALL 1 96 string("array_keys") +0012 INIT_FCALL 1 %d string("print_r") +0013 INIT_FCALL 1 %d string("array_keys") 0014 SEND_VAL array(...) 1 0015 SEND_VAL bool(true) string("strict") 0016 CHECK_UNDEF_ARGS 0017 #5.V9 [array [long] of [long, string]] = DO_ICALL 0018 SEND_VAR #5.V9 [array [long] of [long, string]] 1 0019 DO_ICALL -0020 INIT_FCALL 1 96 string("print_r") -0021 INIT_FCALL 2 112 string("array_keys") +0020 INIT_FCALL 1 %d string("print_r") +0021 INIT_FCALL 2 %d string("array_keys") 0022 SEND_VAL array(...) 1 0023 SEND_VAL int(0) 2 0024 SEND_VAL array(...) string("array") @@ -66,8 +66,8 @@ BB0: 0026 #6.V11 [array [long] of [long, string]] = DO_ICALL 0027 SEND_VAR #6.V11 [array [long] of [long, string]] 1 0028 DO_ICALL -0029 INIT_FCALL 1 96 string("print_r") -0030 INIT_FCALL 1 96 string("array_keys") +0029 INIT_FCALL 1 %d string("print_r") +0030 INIT_FCALL 1 %d string("array_keys") 0031 SEND_VAL array(...) 1 0032 SEND_VAL_EX int(0) string("test") 0033 SEND_VAL bool(true) string("strict") @@ -75,13 +75,13 @@ BB0: 0035 #7.V13 [array [long] of [long, string]] = DO_ICALL 0036 SEND_VAR #7.V13 [array [long] of [long, string]] 1 0037 DO_ICALL -0038 INIT_FCALL 2 112 string("mt_rand") +0038 INIT_FCALL 2 %d string("mt_rand") 0039 SEND_VAL int(0) 1 0040 SEND_VAL int(10) 2 0041 #8.V15 [long] = DO_ICALL 0042 ASSIGN #0.CV0($generated) [undef, ref, any] -> #9.CV0($generated) [ref, any] #8.V15 [long] -0043 INIT_FCALL 1 96 string("print_r") -0044 INIT_FCALL 3 128 string("array_keys") +0043 INIT_FCALL 1 %d string("print_r") +0044 INIT_FCALL 3 %d string("array_keys") 0045 #10.T17 [array [long] of [any]] = INIT_ARRAY 1 (packed) #9.CV0($generated) [ref, any] NEXT 0046 SEND_VAL #10.T17 [array [long] of [any]] 1 0047 SEND_VAR #9.CV0($generated) [ref, any] 2