Skip to content

Commit 947eb95

Browse files
authored
Allow array functions to operate in-place if the refcount is 1 (#11060)
This allows array_merge, array_intersect, array_replace, array_unique and usort to avoid taking a copy and do the transformation in-place. ** Safety ** There are some array functions which take a copy of the input array into a temporary C array for sorting purposes. (e.g. array_unique, array_diff, and array_intersect do this). Since we no longer take a copy in all cases, we must check if it's possible that a value is accessed that was already destroyed. For array_unique: cmpdata will never be removed so that will never reach refcount 0. And when something is removed, it is the previous value of cmpdata, not the one user later. So this seems okay. For array_intersect: a previous pointer (ptr[0] - 1) is accessed. But this can't be a destroyed value because the pointer is first moved forward. For array_diff: it's possible a previous pointer is accessed after destruction. So we can't optimise this case easily.
1 parent 1209f59 commit 947eb95

File tree

2 files changed

+80
-21
lines changed

2 files changed

+80
-21
lines changed

Zend/zend_types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1507,4 +1507,9 @@ static zend_always_inline uint32_t zval_delref_p(zval* pz) {
15071507
do { ZVAL_COPY_OR_DUP(z, v); Z_PROP_FLAG_P(z) = Z_PROP_FLAG_P(v); } while (0)
15081508

15091509

1510+
static zend_always_inline bool zend_may_modify_arg_in_place(const zval *arg)
1511+
{
1512+
return Z_REFCOUNTED_P(arg) && !(GC_FLAGS(Z_COUNTED_P(arg)) & (GC_IMMUTABLE | GC_PERSISTENT)) && Z_REFCOUNT_P(arg) == 1;
1513+
}
1514+
15101515
#endif /* ZEND_TYPES_H */

ext/standard/array.c

Lines changed: 75 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -901,11 +901,19 @@ static void php_usort(INTERNAL_FUNCTION_PARAMETERS, bucket_compare_func_t compar
901901
RETURN_TRUE;
902902
}
903903

904-
/* Copy array, so the in-place modifications will not be visible to the callback function */
905-
arr = zend_array_dup(arr);
904+
/* Copy array, so the in-place modifications will not be visible to the callback function.
905+
* Unless there are no other references since we know for sure it won't be visible. */
906+
bool in_place = zend_may_modify_arg_in_place(array);
907+
if (!in_place) {
908+
arr = zend_array_dup(arr);
909+
}
906910

907911
zend_hash_sort(arr, compare_func, renumber);
908912

913+
if (in_place) {
914+
GC_ADDREF(arr);
915+
}
916+
909917
zval garbage;
910918
ZVAL_COPY_VALUE(&garbage, array);
911919
ZVAL_ARR(array, arr);
@@ -3866,10 +3874,17 @@ static zend_always_inline void php_array_replace_wrapper(INTERNAL_FUNCTION_PARAM
38663874
}
38673875
}
38683876

3869-
/* copy first array */
3877+
/* copy first array if necessary */
38703878
arg = args;
3871-
dest = zend_array_dup(Z_ARRVAL_P(arg));
3879+
bool in_place = zend_may_modify_arg_in_place(arg);
3880+
if (in_place) {
3881+
dest = Z_ARRVAL_P(arg);
3882+
} else {
3883+
dest = zend_array_dup(Z_ARRVAL_P(arg));
3884+
}
3885+
38723886
ZVAL_ARR(return_value, dest);
3887+
38733888
if (recursive) {
38743889
for (i = 1; i < argc; i++) {
38753890
arg = args + i;
@@ -3881,6 +3896,10 @@ static zend_always_inline void php_array_replace_wrapper(INTERNAL_FUNCTION_PARAM
38813896
zend_hash_merge(dest, Z_ARRVAL_P(arg), zval_add_ref, 1);
38823897
}
38833898
}
3899+
3900+
if (in_place) {
3901+
GC_ADDREF(dest);
3902+
}
38843903
}
38853904
/* }}} */
38863905

@@ -3945,22 +3964,34 @@ static zend_always_inline void php_array_merge_wrapper(INTERNAL_FUNCTION_PARAMET
39453964

39463965
arg = args;
39473966
src = Z_ARRVAL_P(arg);
3948-
/* copy first array */
3949-
array_init_size(return_value, count);
3950-
dest = Z_ARRVAL_P(return_value);
3967+
/* copy first array if necessary */
3968+
bool in_place = false;
39513969
if (HT_IS_PACKED(src)) {
3952-
zend_hash_real_init_packed(dest);
3953-
ZEND_HASH_FILL_PACKED(dest) {
3954-
ZEND_HASH_PACKED_FOREACH_VAL(src, src_entry) {
3955-
if (UNEXPECTED(Z_ISREF_P(src_entry) &&
3956-
Z_REFCOUNT_P(src_entry) == 1)) {
3957-
src_entry = Z_REFVAL_P(src_entry);
3958-
}
3959-
Z_TRY_ADDREF_P(src_entry);
3960-
ZEND_HASH_FILL_ADD(src_entry);
3961-
} ZEND_HASH_FOREACH_END();
3962-
} ZEND_HASH_FILL_END();
3970+
/* Note: If it has holes, it might get sequentialized */
3971+
if (HT_IS_WITHOUT_HOLES(src) && zend_may_modify_arg_in_place(arg)) {
3972+
dest = src;
3973+
in_place = true;
3974+
ZVAL_ARR(return_value, dest);
3975+
} else {
3976+
array_init_size(return_value, count);
3977+
dest = Z_ARRVAL_P(return_value);
3978+
3979+
zend_hash_real_init_packed(dest);
3980+
ZEND_HASH_FILL_PACKED(dest) {
3981+
ZEND_HASH_PACKED_FOREACH_VAL(src, src_entry) {
3982+
if (UNEXPECTED(Z_ISREF_P(src_entry) &&
3983+
Z_REFCOUNT_P(src_entry) == 1)) {
3984+
src_entry = Z_REFVAL_P(src_entry);
3985+
}
3986+
Z_TRY_ADDREF_P(src_entry);
3987+
ZEND_HASH_FILL_ADD(src_entry);
3988+
} ZEND_HASH_FOREACH_END();
3989+
} ZEND_HASH_FILL_END();
3990+
}
39633991
} else {
3992+
array_init_size(return_value, count);
3993+
dest = Z_ARRVAL_P(return_value);
3994+
39643995
zend_string *string_key;
39653996
zend_hash_real_init_mixed(dest);
39663997
ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(src, string_key, src_entry) {
@@ -3987,6 +4018,10 @@ static zend_always_inline void php_array_merge_wrapper(INTERNAL_FUNCTION_PARAMET
39874018
php_array_merge(dest, Z_ARRVAL_P(arg));
39884019
}
39894020
}
4021+
4022+
if (in_place) {
4023+
GC_ADDREF(dest);
4024+
}
39904025
}
39914026
/* }}} */
39924027

@@ -4594,7 +4629,12 @@ PHP_FUNCTION(array_unique)
45944629

45954630
cmp = php_get_data_compare_func_unstable(sort_type, 0);
45964631

4597-
RETVAL_ARR(zend_array_dup(Z_ARRVAL_P(array)));
4632+
bool in_place = zend_may_modify_arg_in_place(array);
4633+
if (in_place) {
4634+
RETVAL_ARR(Z_ARRVAL_P(array));
4635+
} else {
4636+
RETVAL_ARR(zend_array_dup(Z_ARRVAL_P(array)));
4637+
}
45984638

45994639
/* create and sort array with pointers to the target_hash buckets */
46004640
arTmp = pemalloc((Z_ARRVAL_P(array)->nNumOfElements + 1) * sizeof(struct bucketindex), GC_FLAGS(Z_ARRVAL_P(array)) & IS_ARRAY_PERSISTENT);
@@ -4640,6 +4680,10 @@ PHP_FUNCTION(array_unique)
46404680
}
46414681
}
46424682
pefree(arTmp, GC_FLAGS(Z_ARRVAL_P(array)) & IS_ARRAY_PERSISTENT);
4683+
4684+
if (in_place) {
4685+
Z_ADDREF_P(return_value);
4686+
}
46434687
}
46444688
/* }}} */
46454689

@@ -4764,6 +4808,7 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int
47644808
zend_fcall_info *fci_key = NULL, *fci_data;
47654809
zend_fcall_info_cache *fci_key_cache = NULL, *fci_data_cache;
47664810
PHP_ARRAY_CMP_FUNC_VARS;
4811+
bool in_place = false;
47674812

47684813
bucket_compare_func_t intersect_key_compare_func;
47694814
bucket_compare_func_t intersect_data_compare_func;
@@ -4890,8 +4935,13 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int
48904935
}
48914936
}
48924937

4893-
/* copy the argument array */
4894-
RETVAL_ARR(zend_array_dup(Z_ARRVAL(args[0])));
4938+
/* copy the argument array if necessary */
4939+
in_place = zend_may_modify_arg_in_place(&args[0]);
4940+
if (in_place) {
4941+
RETVAL_ARR(Z_ARRVAL_P(&args[0]));
4942+
} else {
4943+
RETVAL_ARR(zend_array_dup(Z_ARRVAL_P(&args[0])));
4944+
}
48954945

48964946
/* go through the lists and look for common values */
48974947
while (Z_TYPE(ptrs[0]->val) != IS_UNDEF) {
@@ -5002,6 +5052,10 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int
50025052

50035053
efree(ptrs);
50045054
efree(lists);
5055+
5056+
if (in_place) {
5057+
Z_ADDREF_P(return_value);
5058+
}
50055059
}
50065060
/* }}} */
50075061

0 commit comments

Comments
 (0)