Skip to content

Commit c6c0eb8

Browse files
committed
Optimize array_slice for packed arrays with large offsets
If the offset is 100000, and there are no gaps in the packed/unpacked array, then advance the pointer once by 100000, instead of looping and skipping 100000 times. Add a new test of array_slice handling unset offsets.
1 parent 8a88f72 commit c6c0eb8

File tree

2 files changed

+178
-37
lines changed

2 files changed

+178
-37
lines changed

ext/standard/array.c

Lines changed: 73 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3455,6 +3455,32 @@ PHP_FUNCTION(array_splice)
34553455
}
34563456
/* }}} */
34573457

3458+
/* {{{ find_bucket_at_offset(HashTable* ht, zend_long offset)
3459+
Finds the bucket at the given valid offset */
3460+
static inline Bucket* find_bucket_at_offset(HashTable* ht, zend_long offset)
3461+
{
3462+
zend_long pos;
3463+
Bucket *bucket;
3464+
ZEND_ASSERT(offset >= 0 && offset <= ht->nNumOfElements);
3465+
if (HT_IS_WITHOUT_HOLES(ht)) {
3466+
/** There's no need to iterate over the array to filter out holes if there are no holes */
3467+
/** This properly handles both packed and unpacked arrays. */
3468+
return ht->arData + offset;
3469+
}
3470+
/* Otherwise, this code has to iterate over the HashTable and skip holes in the array. */
3471+
pos = 0;
3472+
ZEND_HASH_FOREACH_BUCKET(ht, bucket) {
3473+
if (pos >= offset) {
3474+
/** This is the bucket of the array element at the requested offset */
3475+
return bucket;
3476+
}
3477+
++pos;
3478+
} ZEND_HASH_FOREACH_END();
3479+
3480+
/** Return a pointer to the end of the bucket array. */
3481+
return ht->arData + ht->nNumUsed;
3482+
}
3483+
34583484
/* {{{ proto array array_slice(array input, int offset [, int length [, bool preserve_keys]])
34593485
Returns elements specified by offset and length */
34603486
PHP_FUNCTION(array_slice)
@@ -3466,7 +3492,6 @@ PHP_FUNCTION(array_slice)
34663492
zend_bool length_is_null = 1; /* Whether an explicit length has been omitted */
34673493
zend_bool preserve_keys = 0; /* Whether to preserve keys while copying to the new array */
34683494
uint32_t num_in; /* Number of elements in the input array */
3469-
uint32_t pos; /* Current position in the array */
34703495
zend_string *string_key;
34713496
zend_ulong num_key;
34723497

@@ -3507,50 +3532,61 @@ PHP_FUNCTION(array_slice)
35073532
/* Initialize returned array */
35083533
array_init_size(return_value, (uint32_t)length);
35093534

3510-
/* Start at the beginning and go until we hit offset */
3511-
pos = 0;
3512-
if (HT_IS_PACKED(Z_ARRVAL_P(input)) &&
3513-
(!preserve_keys ||
3514-
(offset == 0 && HT_IS_WITHOUT_HOLES(Z_ARRVAL_P(input))))) {
3515-
zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
3516-
ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
3517-
ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(input), entry) {
3518-
pos++;
3519-
if (pos <= offset) {
3535+
// Contains modified variants of ZEND_HASH_FOREACH_VAL
3536+
{
3537+
HashTable *ht = Z_ARRVAL_P(input);
3538+
Bucket *p = find_bucket_at_offset(ht, offset);
3539+
Bucket *end = ht->arData + ht->nNumUsed;
3540+
3541+
/* Start at the beginning and go until we hit offset */
3542+
if (HT_IS_PACKED(Z_ARRVAL_P(input)) &&
3543+
(!preserve_keys ||
3544+
(offset == 0 && HT_IS_WITHOUT_HOLES(Z_ARRVAL_P(input))))) {
3545+
3546+
zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
3547+
ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
3548+
for (; p != end; p++) {
3549+
if (__fill_idx >= length) {
3550+
break;
3551+
}
3552+
entry = &p->val;
3553+
if (UNEXPECTED(Z_TYPE_P(entry) == IS_UNDEF)) {
3554+
continue;
3555+
}
3556+
if (UNEXPECTED(Z_ISREF_P(entry)) &&
3557+
UNEXPECTED(Z_REFCOUNT_P(entry) == 1)) {
3558+
entry = Z_REFVAL_P(entry);
3559+
}
3560+
Z_TRY_ADDREF_P(entry);
3561+
ZEND_HASH_FILL_ADD(entry);
3562+
}
3563+
} ZEND_HASH_FILL_END();
3564+
} else {
3565+
zend_long n = 0; /* Current number of elements */
3566+
for (; p != end; p++) {
3567+
entry = &p->val;
3568+
if (UNEXPECTED(Z_TYPE_P(entry) == IS_UNDEF)) {
35203569
continue;
35213570
}
3522-
if (pos > offset + length) {
3571+
if (n >= length) {
35233572
break;
35243573
}
3525-
if (UNEXPECTED(Z_ISREF_P(entry)) &&
3526-
UNEXPECTED(Z_REFCOUNT_P(entry) == 1)) {
3527-
entry = Z_REFVAL_P(entry);
3528-
}
3529-
Z_TRY_ADDREF_P(entry);
3530-
ZEND_HASH_FILL_ADD(entry);
3531-
} ZEND_HASH_FOREACH_END();
3532-
} ZEND_HASH_FILL_END();
3533-
} else {
3534-
ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
3535-
pos++;
3536-
if (pos <= offset) {
3537-
continue;
3538-
}
3539-
if (pos > offset + length) {
3540-
break;
3541-
}
3574+
n++;
3575+
num_key = p->h;
3576+
string_key = p->key;
35423577

3543-
if (string_key) {
3544-
entry = zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, entry);
3545-
} else {
3546-
if (preserve_keys) {
3547-
entry = zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, entry);
3578+
if (string_key) {
3579+
entry = zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, entry);
35483580
} else {
3549-
entry = zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), entry);
3581+
if (preserve_keys) {
3582+
entry = zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, entry);
3583+
} else {
3584+
entry = zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), entry);
3585+
}
35503586
}
3587+
zval_add_ref(entry);
35513588
}
3552-
zval_add_ref(entry);
3553-
} ZEND_HASH_FOREACH_END();
3589+
}
35543590
}
35553591
}
35563592
/* }}} */
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
--TEST--
2+
Test array_slice() function : usage variations - array has holes in buckets
3+
--FILE--
4+
<?php
5+
/* Prototype : array array_slice(array $input, int $offset [, int $length [, bool $preserve_keys]])
6+
* Description: Returns elements specified by offset and length
7+
* Source code: ext/standard/array.c
8+
*/
9+
10+
/*
11+
* Check that results of array_slice are correct when there are holes in buckets caused by unset()
12+
*/
13+
14+
echo "*** Testing array_slice() : usage variations ***\n";
15+
16+
function dump_slice(array $input, $offsetToUnset, int $offset, int $length) {
17+
unset($input[$offsetToUnset]);
18+
var_dump(array_slice($input, $offset, $length));
19+
}
20+
21+
echo "\n-- Call array_slice() on array with string keys--\n";
22+
$input = ['one' => 'un', 'two' => 'deux', 23 => 'twenty-three', 'zero'];
23+
dump_slice($input, 'two', 0, 1);
24+
dump_slice($input, 'two', 0, 2);
25+
dump_slice($input, 'two', 0, 3);
26+
dump_slice($input, 23, 1, 2);
27+
28+
echo "\n-- Call array_slice() on array with packed keys--\n";
29+
$input = [10, 11, 12, 'thirteen'];
30+
dump_slice($input, 0, 0, 1);
31+
dump_slice($input, 1, 0, 1);
32+
dump_slice($input, 1, 0, 3);
33+
dump_slice($input, 1, -1, 1);
34+
dump_slice($input, 1, 0, 3);
35+
dump_slice($input, 1, -3, 3);
36+
37+
echo "Done";
38+
?>
39+
--EXPECT--
40+
*** Testing array_slice() : usage variations ***
41+
42+
-- Call array_slice() on array with string keys--
43+
array(1) {
44+
["one"]=>
45+
string(2) "un"
46+
}
47+
array(2) {
48+
["one"]=>
49+
string(2) "un"
50+
[0]=>
51+
string(12) "twenty-three"
52+
}
53+
array(3) {
54+
["one"]=>
55+
string(2) "un"
56+
[0]=>
57+
string(12) "twenty-three"
58+
[1]=>
59+
string(4) "zero"
60+
}
61+
array(2) {
62+
["two"]=>
63+
string(4) "deux"
64+
[0]=>
65+
string(4) "zero"
66+
}
67+
68+
-- Call array_slice() on array with packed keys--
69+
array(1) {
70+
[0]=>
71+
int(11)
72+
}
73+
array(1) {
74+
[0]=>
75+
int(10)
76+
}
77+
array(3) {
78+
[0]=>
79+
int(10)
80+
[1]=>
81+
int(12)
82+
[2]=>
83+
string(8) "thirteen"
84+
}
85+
array(1) {
86+
[0]=>
87+
string(8) "thirteen"
88+
}
89+
array(3) {
90+
[0]=>
91+
int(10)
92+
[1]=>
93+
int(12)
94+
[2]=>
95+
string(8) "thirteen"
96+
}
97+
array(3) {
98+
[0]=>
99+
int(10)
100+
[1]=>
101+
int(12)
102+
[2]=>
103+
string(8) "thirteen"
104+
}
105+
Done

0 commit comments

Comments
 (0)