Skip to content

In place array modification #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 15 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 121 additions & 22 deletions ext/standard/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -3825,6 +3825,56 @@ PHPAPI int php_array_replace_recursive(HashTable *dest, HashTable *src) /* {{{ *
}
/* }}} */

/* Returns true if it's possible to do an in-place array modification, preventing a costly copy.
* It also modifies the CV to prevent freeing it upon assigning.
* If this returns true you need to add a ref at the end of the modification for the return value. */
static bool prepare_in_place_array_modify_if_possible(const zend_execute_data *execute_data, const zval *arg)
{
/* 2 refs: the CV and the argument; or 1 ref for a temporary passed as argument */
uint32_t refcount = Z_REFCOUNT_P(arg);
ZEND_ASSERT(refcount > 0);
if (refcount > 2) {
return false;
}
/* Immutable or persistent => no modification allowed */
if (GC_FLAGS(Z_ARRVAL_P(arg)) & (IS_ARRAY_IMMUTABLE | IS_ARRAY_PERSISTENT)) {
return false;
}

if (refcount == 2) {
const zend_op *call_opline = execute_data->prev_execute_data->opline;
const zend_op *next_opline = call_opline + 1;

/* Must be an assignment from the result of the call to a CV */
if (next_opline->opcode != ZEND_ASSIGN || next_opline->op1_type != IS_CV || next_opline->op2.var != call_opline->result.var) {
return false;
}

/* Must be an assignment to the same array as the input */
zval *var = ZEND_CALL_VAR(execute_data->prev_execute_data, next_opline->op1.var);
if (Z_TYPE_P(var) != IS_ARRAY || Z_ARRVAL_P(arg) != Z_ARRVAL_P(var)) {
return false;
}
/* Must set the CV to NULL so we don't destroy the array on assignment */
ZVAL_NULL(var);
/* Make RC 1 such that the array may be modified */
GC_DELREF(Z_ARRVAL_P(arg));
}

return true;
}

static bool set_return_value_array_dup_or_in_place(const zend_execute_data *execute_data, const zval *arg, zval *return_value)
{
if (prepare_in_place_array_modify_if_possible(execute_data, arg)) {
RETVAL_ARR(Z_ARRVAL_P(arg));
return true;
} else {
RETVAL_ARR(zend_array_dup(Z_ARRVAL_P(arg)));
return false;
}
}

static zend_always_inline void php_array_replace_wrapper(INTERNAL_FUNCTION_PARAMETERS, int recursive) /* {{{ */
{
zval *args = NULL;
Expand All @@ -3846,10 +3896,11 @@ static zend_always_inline void php_array_replace_wrapper(INTERNAL_FUNCTION_PARAM
}
}

/* copy first array */
/* copy first array if necessary */
arg = args;
dest = zend_array_dup(Z_ARRVAL_P(arg));
ZVAL_ARR(return_value, dest);
bool update_refcount = set_return_value_array_dup_or_in_place(execute_data, arg, return_value);
dest = Z_ARRVAL_P(return_value);

if (recursive) {
for (i = 1; i < argc; i++) {
arg = args + i;
Expand All @@ -3861,6 +3912,10 @@ static zend_always_inline void php_array_replace_wrapper(INTERNAL_FUNCTION_PARAM
zend_hash_merge(dest, Z_ARRVAL_P(arg), zval_add_ref, 1);
}
}

if (update_refcount) {
GC_ADDREF(dest);
}
}
/* }}} */

Expand Down Expand Up @@ -3925,22 +3980,34 @@ static zend_always_inline void php_array_merge_wrapper(INTERNAL_FUNCTION_PARAMET

arg = args;
src = Z_ARRVAL_P(arg);
/* copy first array */
array_init_size(return_value, count);
dest = Z_ARRVAL_P(return_value);
bool update_refcount = false;
/* copy first array if necessary */
if (HT_IS_PACKED(src)) {
zend_hash_real_init_packed(dest);
ZEND_HASH_FILL_PACKED(dest) {
ZEND_HASH_PACKED_FOREACH_VAL(src, src_entry) {
if (UNEXPECTED(Z_ISREF_P(src_entry) &&
Z_REFCOUNT_P(src_entry) == 1)) {
src_entry = Z_REFVAL_P(src_entry);
}
Z_TRY_ADDREF_P(src_entry);
ZEND_HASH_FILL_ADD(src_entry);
} ZEND_HASH_FOREACH_END();
} ZEND_HASH_FILL_END();
/* Note: If it has holes, it might get sequentialized */
if (HT_IS_WITHOUT_HOLES(src) && prepare_in_place_array_modify_if_possible(execute_data, arg)) {
update_refcount = true;
dest = src;
ZVAL_ARR(return_value, dest);
} else {
array_init_size(return_value, count);
dest = Z_ARRVAL_P(return_value);

zend_hash_real_init_packed(dest);
ZEND_HASH_FILL_PACKED(dest) {
ZEND_HASH_PACKED_FOREACH_VAL(src, src_entry) {
if (UNEXPECTED(Z_ISREF_P(src_entry) &&
Z_REFCOUNT_P(src_entry) == 1)) {
src_entry = Z_REFVAL_P(src_entry);
}
Z_TRY_ADDREF_P(src_entry);
ZEND_HASH_FILL_ADD(src_entry);
} ZEND_HASH_FOREACH_END();
} ZEND_HASH_FILL_END();
}
} else {
array_init_size(return_value, count);
dest = Z_ARRVAL_P(return_value);

zend_string *string_key;
zend_hash_real_init_mixed(dest);
ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(src, string_key, src_entry) {
Expand All @@ -3967,6 +4034,10 @@ static zend_always_inline void php_array_merge_wrapper(INTERNAL_FUNCTION_PARAMET
php_array_merge(dest, Z_ARRVAL_P(arg));
}
}

if (update_refcount) {
GC_ADDREF(src);
}
}
/* }}} */

Expand Down Expand Up @@ -4574,7 +4645,7 @@ PHP_FUNCTION(array_unique)

cmp = php_get_data_compare_func_unstable(sort_type, 0);

RETVAL_ARR(zend_array_dup(Z_ARRVAL_P(array)));
bool update_refcount = set_return_value_array_dup_or_in_place(execute_data, array, return_value);

/* create and sort array with pointers to the target_hash buckets */
arTmp = pemalloc((Z_ARRVAL_P(array)->nNumOfElements + 1) * sizeof(struct bucketindex), GC_FLAGS(Z_ARRVAL_P(array)) & IS_ARRAY_PERSISTENT);
Expand Down Expand Up @@ -4620,6 +4691,10 @@ PHP_FUNCTION(array_unique)
}
}
pefree(arTmp, GC_FLAGS(Z_ARRVAL_P(array)) & IS_ARRAY_PERSISTENT);

if (update_refcount) {
GC_ADDREF(Z_ARRVAL_P(array));
}
}
/* }}} */

Expand Down Expand Up @@ -4744,6 +4819,7 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int
zend_fcall_info *fci_key = NULL, *fci_data;
zend_fcall_info_cache *fci_key_cache = NULL, *fci_data_cache;
PHP_ARRAY_CMP_FUNC_VARS;
bool in_place = false;

bucket_compare_func_t intersect_key_compare_func;
bucket_compare_func_t intersect_data_compare_func;
Expand Down Expand Up @@ -4828,12 +4904,15 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int
} else if ((behavior & INTERSECT_ASSOC) && key_compare_type == INTERSECT_COMP_KEY_USER) {
BG(user_compare_fci) = *fci_key;
BG(user_compare_fci_cache) = *fci_key_cache;
} else {
in_place = true;
}

for (i = 0; i < arr_argc; i++) {
if (Z_TYPE(args[i]) != IS_ARRAY) {
zend_argument_type_error(i + 1, "must be of type array, %s given", zend_zval_value_name(&args[i]));
arr_argc = i; /* only free up to i - 1 */
in_place = false;
goto out;
}
hash = Z_ARRVAL(args[i]);
Expand Down Expand Up @@ -4870,8 +4949,12 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int
}
}

/* copy the argument array */
RETVAL_ARR(zend_array_dup(Z_ARRVAL(args[0])));
/* copy the argument array if necessary */
if (in_place) {
in_place = set_return_value_array_dup_or_in_place(execute_data, &args[0], return_value);
} else {
RETVAL_ARR(zend_array_dup(Z_ARRVAL(args[0])));
}

/* go through the lists and look for common values */
while (Z_TYPE(ptrs[0]->val) != IS_UNDEF) {
Expand Down Expand Up @@ -4982,6 +5065,10 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int

efree(ptrs);
efree(lists);

if (in_place) {
GC_ADDREF(Z_ARRVAL_P(return_value));
}
}
/* }}} */

Expand Down Expand Up @@ -5129,6 +5216,7 @@ static void php_array_diff(INTERNAL_FUNCTION_PARAMETERS, int behavior, int data_
zend_fcall_info *fci_key = NULL, *fci_data;
zend_fcall_info_cache *fci_key_cache = NULL, *fci_data_cache;
PHP_ARRAY_CMP_FUNC_VARS;
bool in_place = false;

bucket_compare_func_t diff_key_compare_func;
bucket_compare_func_t diff_data_compare_func;
Expand Down Expand Up @@ -5213,12 +5301,15 @@ static void php_array_diff(INTERNAL_FUNCTION_PARAMETERS, int behavior, int data_
} else if ((behavior & DIFF_ASSOC) && key_compare_type == DIFF_COMP_KEY_USER) {
BG(user_compare_fci) = *fci_key;
BG(user_compare_fci_cache) = *fci_key_cache;
} else {
in_place = true;
}

for (i = 0; i < arr_argc; i++) {
if (Z_TYPE(args[i]) != IS_ARRAY) {
zend_argument_type_error(i + 1, "must be of type array, %s given", zend_zval_value_name(&args[i]));
arr_argc = i; /* only free up to i - 1 */
in_place = false;
goto out;
}
hash = Z_ARRVAL(args[i]);
Expand Down Expand Up @@ -5255,8 +5346,12 @@ static void php_array_diff(INTERNAL_FUNCTION_PARAMETERS, int behavior, int data_
}
}

/* copy the argument array */
RETVAL_ARR(zend_array_dup(Z_ARRVAL(args[0])));
/* copy the argument array if necessary */
if (in_place) {
in_place = set_return_value_array_dup_or_in_place(execute_data, &args[0], return_value);
} else {
RETVAL_ARR(zend_array_dup(Z_ARRVAL(args[0])));
}

/* go through the lists and look for values of ptr[0] that are not in the others */
while (Z_TYPE(ptrs[0]->val) != IS_UNDEF) {
Expand Down Expand Up @@ -5365,6 +5460,10 @@ static void php_array_diff(INTERNAL_FUNCTION_PARAMETERS, int behavior, int data_

efree(ptrs);
efree(lists);

if (in_place) {
GC_ADDREF(Z_ARRVAL_P(return_value));
}
}
/* }}} */

Expand Down