Skip to content

Commit 366b22a

Browse files
Merge branch 'php:master' into master
2 parents 6567de0 + 1d42ea0 commit 366b22a

File tree

12 files changed

+304
-52
lines changed

12 files changed

+304
-52
lines changed

UPGRADING.INTERNALS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ PHP 8.3 INTERNALS UPGRADE NOTES
6060
length at each call. The key_suffix parameter was dropped as it was a
6161
constant value and depended on the key_prefix parameter to not be NULL.
6262

63+
c. ext/mysqlnd
64+
- The function mysqlnd_shutdown and its corresponding internal methods
65+
mysqlnd_command::shutdown & mysqlnd_conn_data::shutdown have been removed.
66+
These functions are deprecated by MySQL in favour of SHUTDOWN SQL statement.
67+
6368
========================
6469
4. OpCode changes
6570
========================

Zend/tests/gh10346.phpt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
--TEST--
2+
GH-10346 (Observer: enum tryFrom() run_time_cache properly assigned)
3+
--CREDITS--
4+
Florian Sowade
5+
--EXTENSIONS--
6+
zend_test
7+
--INI--
8+
zend_test.observer.enabled=1
9+
zend_test.observer.observe_all=1
10+
--FILE--
11+
<?php
12+
enum Card : string
13+
{
14+
case HEART = 'H';
15+
}
16+
17+
var_dump(Card::tryFrom('H'));
18+
?>
19+
--EXPECTF--
20+
<!-- init '%s' -->
21+
<file '%s'>
22+
<!-- init Card::tryFrom() -->
23+
<Card::tryFrom>
24+
</Card::tryFrom>
25+
<!-- init var_dump() -->
26+
<var_dump>
27+
enum(Card::HEART)
28+
</var_dump>
29+
</file '%s'>

ext/hash/hash_xxhash.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,9 @@ zend_always_inline static void _PHP_XXH3_Init(PHP_XXH3_64_CTX *ctx, HashTable *a
174174
func_init_seed(&ctx->s, (XXH64_hash_t)Z_LVAL_P(_seed));
175175
return;
176176
} else if (_secret) {
177-
convert_to_string(_secret);
177+
if (!try_convert_to_string(_secret)) {
178+
return;
179+
}
178180
size_t len = Z_STRLEN_P(_secret);
179181
if (len < PHP_XXH3_SECRET_SIZE_MIN) {
180182
zend_throw_error(NULL, "%s: Secret length must be >= %u bytes, %zu bytes passed", algo_name, XXH3_SECRET_SIZE_MIN, len);

ext/hash/tests/xxhash_secret.phpt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@ Hash: xxHash secret
33
--FILE--
44
<?php
55

6+
class StringableThrowingClass {
7+
public function __toString(): string {
8+
throw new Exception('exception in __toString');
9+
return '';
10+
}
11+
}
12+
613
foreach (["xxh3", "xxh128"] as $a) {
714

815
//$secret = random_bytes(256);
@@ -14,6 +21,12 @@ foreach (["xxh3", "xxh128"] as $a) {
1421
var_dump($e->getMessage());
1522
}
1623

24+
try {
25+
$ctx = hash_init($a, options: ["secret" => new StringableThrowingClass()]);
26+
} catch (Throwable $e) {
27+
var_dump($e->getMessage());
28+
}
29+
1730
try {
1831
$ctx = hash_init($a, options: ["secret" => str_repeat('a', 17)]);
1932
} catch (Throwable $e) {
@@ -35,8 +48,10 @@ foreach (["xxh3", "xxh128"] as $a) {
3548
?>
3649
--EXPECT--
3750
string(67) "xxh3: Only one of seed or secret is to be passed for initialization"
51+
string(23) "exception in __toString"
3852
string(57) "xxh3: Secret length must be >= 136 bytes, 17 bytes passed"
3953
8028aa834c03557a == 8028aa834c03557a == true
4054
string(69) "xxh128: Only one of seed or secret is to be passed for initialization"
55+
string(23) "exception in __toString"
4156
string(59) "xxh128: Secret length must be >= 136 bytes, 17 bytes passed"
4257
54279097795e7218093a05d4d781cbb9 == 54279097795e7218093a05d4d781cbb9 == true

ext/mbstring/mbstring.c

Lines changed: 209 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,9 +1748,11 @@ static size_t mb_fast_strlen_utf8(unsigned char *p, size_t len)
17481748

17491749
#ifdef __SSE2__
17501750
if (len >= sizeof(__m128i)) {
1751+
e -= sizeof(__m128i);
1752+
17511753
const __m128i threshold = _mm_set1_epi8(-64);
17521754
const __m128i delta = _mm_set1_epi8(1);
1753-
__m128i counter = _mm_set1_epi8(0); /* Vector of 16 continuation-byte counters */
1755+
__m128i counter = _mm_setzero_si128(); /* Vector of 16 continuation-byte counters */
17541756

17551757
int reset_counter = 255;
17561758
do {
@@ -1762,13 +1764,14 @@ static size_t mb_fast_strlen_utf8(unsigned char *p, size_t len)
17621764
* and reset them to zero */
17631765
if (--reset_counter == 0) {
17641766
len -= _mm_sum_epu8(counter);
1765-
counter = _mm_set1_epi8(0);
1767+
counter = _mm_setzero_si128();
17661768
reset_counter = 255;
17671769
}
17681770

17691771
p += sizeof(__m128i);
1770-
} while (p + sizeof(__m128i) <= e);
1772+
} while (p <= e);
17711773

1774+
e += sizeof(__m128i);
17721775
len -= _mm_sum_epu8(counter); /* Fold in any remaining non-zero values in the 16 counters */
17731776
}
17741777
#endif
@@ -4587,13 +4590,215 @@ MBSTRING_API bool php_mb_check_encoding(const char *input, size_t length, const
45874590
return true;
45884591
}
45894592

4593+
static bool mb_fast_check_utf8(zend_string *str)
4594+
{
4595+
#ifdef __SSE2__
4596+
unsigned char *p = (unsigned char*)ZSTR_VAL(str);
4597+
/* `e` points 1 byte past the last full 16-byte block of string content
4598+
* Note that we include the terminating null byte which is included in each zend_string
4599+
* as part of the content to check; this ensures that multi-byte characters which are
4600+
* truncated abruptly at the end of the string will be detected as invalid */
4601+
unsigned char *e = p + ((ZSTR_LEN(str) + 1) & ~(sizeof(__m128i) - 1));
4602+
4603+
/* For checking for illegal bytes 0xF5-FF */
4604+
const __m128i over_f5 = _mm_set1_epi8(-117);
4605+
/* For checking for overlong 3-byte code units and reserved codepoints U+D800-DFFF */
4606+
const __m128i over_9f = _mm_set1_epi8(-97);
4607+
/* For checking for overlong 4-byte code units and invalid codepoints > U+10FFFF */
4608+
const __m128i over_8f = _mm_set1_epi8(-113);
4609+
/* For checking for illegal bytes 0xC0-C1 */
4610+
const __m128i find_c0 = _mm_set1_epi8(-64);
4611+
const __m128i c0_to_c1 = _mm_set1_epi8(-126);
4612+
/* For checking structure of continuation bytes */
4613+
const __m128i find_e0 = _mm_set1_epi8(-32);
4614+
const __m128i find_f0 = _mm_set1_epi8(-16);
4615+
4616+
__m128i last_block = _mm_setzero_si128();
4617+
__m128i operand;
4618+
4619+
while (p < e) {
4620+
operand = _mm_loadu_si128((__m128i*)p); /* Load 16 bytes */
4621+
4622+
check_operand:
4623+
/* If all 16 bytes are single-byte characters, then a number of checks can be skipped */
4624+
if (!_mm_movemask_epi8(_mm_cmplt_epi8(operand, _mm_setzero_si128()))) {
4625+
/* Even if this block only contains single-byte characters, there may have been a
4626+
* multi-byte character at the end of the previous block, which was supposed to
4627+
* have continuation bytes in this block
4628+
* This bitmask will pick out a 2/3/4-byte character starting from the last byte of
4629+
* the previous block, a 3/4-byte starting from the 2nd last, or a 4-byte starting
4630+
* from the 3rd last */
4631+
__m128i bad_mask = _mm_set_epi8(-64, -32, -16, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
4632+
__m128i bad = _mm_cmpeq_epi8(_mm_and_si128(last_block, bad_mask), bad_mask);
4633+
if (_mm_movemask_epi8(bad)) {
4634+
return false;
4635+
}
4636+
4637+
/* Consume as many full blocks of single-byte characters as we can */
4638+
while (true) {
4639+
p += sizeof(__m128i);
4640+
if (p >= e) {
4641+
goto finish_up_remaining_bytes;
4642+
}
4643+
operand = _mm_loadu_si128((__m128i*)p);
4644+
if (_mm_movemask_epi8(_mm_cmplt_epi8(operand, _mm_setzero_si128()))) {
4645+
break;
4646+
}
4647+
}
4648+
}
4649+
4650+
/* Check for >= 0xF5, which are illegal byte values in UTF-8
4651+
* AVX512 has instructions for vectorized unsigned compare, but SSE2 only has signed compare
4652+
* So we add an offset to shift 0xF5-FF to the far low end of the signed byte range
4653+
* Then a single signed compare will pick out any bad bytes
4654+
* `bad` is a vector of 16 good/bad values, where 0x00 means good and 0xFF means bad */
4655+
__m128i bad = _mm_cmplt_epi8(_mm_add_epi8(operand, over_f5), over_f5);
4656+
4657+
/* Check for overlong 3-byte code units AND reserved codepoints U+D800-DFFF
4658+
* 0xE0 followed by a byte < 0xA0 indicates an overlong 3-byte code unit, and
4659+
* 0xED followed by a byte >= 0xA0 indicates a reserved codepoint
4660+
* We can check for both problems at once by generating a vector where each byte < 0xA0
4661+
* is mapped to 0xE0, and each byte >= 0xA0 is mapped to 0xED
4662+
* Shift the original block right by one byte, and XOR the shifted block with the bitmask
4663+
* Any matches will give a 0x00 byte; do a compare with a zero vector to pick out the
4664+
* bad positions, and OR them into `bad` */
4665+
__m128i operand2 = _mm_or_si128(_mm_slli_si128(operand, 1), _mm_srli_si128(last_block, 15));
4666+
__m128i mask1 = _mm_or_si128(find_e0, _mm_and_si128(_mm_set1_epi8(0xD), _mm_cmpgt_epi8(operand, over_9f)));
4667+
bad = _mm_or_si128(bad, _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_xor_si128(operand2, mask1)));
4668+
4669+
/* Check for overlong 4-byte code units AND invalid codepoints > U+10FFFF
4670+
* Similar to the previous check; 0xF0 followed by < 0x90 indicates an overlong 4-byte
4671+
* code unit, and 0xF4 followed by >= 0x90 indicates a codepoint over U+10FFFF
4672+
* Build the bitmask, XOR it with the shifted block, check for 0x00 bytes in the result */
4673+
__m128i mask2 = _mm_or_si128(find_f0, _mm_and_si128(_mm_set1_epi8(0x4), _mm_cmpgt_epi8(operand, over_8f)));
4674+
bad = _mm_or_si128(bad, _mm_cmpeq_epi8(_mm_setzero_si128(), _mm_xor_si128(operand2, mask2)));
4675+
4676+
/* Check for overlong 2-byte code units
4677+
* Any 0xC0 or 0xC1 byte can only be the first byte of an overlong 2-byte code unit
4678+
* Same deal as before; add an offset to shift 0xC0-C1 to the far low end of the signed
4679+
* byte range, do a signed compare to pick out any bad bytes */
4680+
bad = _mm_or_si128(bad, _mm_cmplt_epi8(_mm_add_epi8(operand, find_c0), c0_to_c1));
4681+
4682+
/* Check structure of continuation bytes
4683+
* A UTF-8 byte should be a continuation byte if, and only if, it is:
4684+
* 1) 1 byte after the start of a 2-byte, 3-byte, or 4-byte character
4685+
* 2) 2 bytes after the start of a 3-byte or 4-byte character
4686+
* 3) 3 bytes after the start of a 4-byte character
4687+
* We build 3 bitmasks with 0xFF in each such position, and OR them together to
4688+
* get a single bitmask with 0xFF in each position where a continuation byte should be */
4689+
__m128i cont_mask = _mm_cmpeq_epi8(_mm_and_si128(operand2, find_c0), find_c0);
4690+
__m128i operand3 = _mm_or_si128(_mm_slli_si128(operand, 2), _mm_srli_si128(last_block, 14));
4691+
cont_mask = _mm_or_si128(cont_mask, _mm_cmpeq_epi8(_mm_and_si128(operand3, find_e0), find_e0));
4692+
__m128i operand4 = _mm_or_si128(_mm_slli_si128(operand, 3), _mm_srli_si128(last_block, 13));
4693+
cont_mask = _mm_or_si128(cont_mask, _mm_cmpeq_epi8(_mm_and_si128(operand4, find_f0), find_f0));
4694+
4695+
/* Now, use a signed comparison to get another bitmask with 0xFF in each position where
4696+
* a continuation byte actually is
4697+
* XOR those two bitmasks together; if everything is good, the result should be zero
4698+
* However, if a byte which should have been a continuation wasn't, or if a byte which
4699+
* shouldn't have been a continuation was, we will get 0xFF in that position */
4700+
__m128i continuation = _mm_cmplt_epi8(operand, find_c0);
4701+
bad = _mm_or_si128(bad, _mm_xor_si128(continuation, cont_mask));
4702+
4703+
/* Pick out the high bit of each byte in `bad` as a 16-bit value (into a scalar register)
4704+
* If that value is non-zero, then we found a bad byte somewhere! */
4705+
if (_mm_movemask_epi8(bad)) {
4706+
return false;
4707+
}
4708+
4709+
last_block = operand;
4710+
p += sizeof(__m128i);
4711+
}
4712+
4713+
finish_up_remaining_bytes: ;
4714+
/* Finish up 1-15 remaining bytes */
4715+
if (p == e) {
4716+
uint8_t remaining_bytes = ZSTR_LEN(str) & (sizeof(__m128i) - 1); /* Not including terminating null */
4717+
4718+
/* Crazy hack here... we want to use the above vectorized code to check a block of less than 16
4719+
* bytes, but there is no good way to read a variable number of bytes into an XMM register
4720+
* However, we know that these bytes are part of a zend_string, and a zend_string has some
4721+
* 'header' fields which occupy the memory just before its content
4722+
* And, those header fields occupy more than 16 bytes...
4723+
* So if we go back 16 bytes from the end of the zend_string content, and load 16 bytes from there,
4724+
* we may pick up some 'junk' bytes from the zend_string header fields, but we will get the 1-15
4725+
* bytes we wanted in the tail end of our XMM register, and this will never cause a segfault.
4726+
* Then, we do a left shift to get rid of the unwanted bytes
4727+
* Conveniently, the same left shift also zero-fills the tail end of the XMM register
4728+
*
4729+
* The following `switch` looks useless, but it's not
4730+
* The PSRLDQ instruction used for the 128-bit left shift requires an immediate (literal)
4731+
* shift distance, so the compiler will choke on _mm_srli_si128(operand, shift_dist)
4732+
*/
4733+
switch (remaining_bytes) {
4734+
case 0:
4735+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 15)), 15);
4736+
goto check_operand;
4737+
case 1:
4738+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 14)), 14);
4739+
goto check_operand;
4740+
case 2:
4741+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 13)), 13);
4742+
goto check_operand;
4743+
case 3:
4744+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 12)), 12);
4745+
goto check_operand;
4746+
case 4:
4747+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 11)), 11);
4748+
goto check_operand;
4749+
case 5:
4750+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 10)), 10);
4751+
goto check_operand;
4752+
case 6:
4753+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 9)), 9);
4754+
goto check_operand;
4755+
case 7:
4756+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 8)), 8);
4757+
goto check_operand;
4758+
case 8:
4759+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 7)), 7);
4760+
goto check_operand;
4761+
case 9:
4762+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 6)), 6);
4763+
goto check_operand;
4764+
case 10:
4765+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 5)), 5);
4766+
goto check_operand;
4767+
case 11:
4768+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 4)), 4);
4769+
goto check_operand;
4770+
case 12:
4771+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 3)), 3);
4772+
goto check_operand;
4773+
case 13:
4774+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 2)), 2);
4775+
goto check_operand;
4776+
case 14:
4777+
operand = _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 1)), 1);
4778+
goto check_operand;
4779+
case 15:
4780+
/* No trailing bytes are left which need to be checked
4781+
* We get 15 because we did not include the terminating null when
4782+
* calculating `remaining_bytes`, so the value wraps around */
4783+
return true;
4784+
}
4785+
4786+
ZEND_UNREACHABLE();
4787+
}
4788+
4789+
return true;
4790+
#else
4791+
return php_mb_check_encoding(ZSTR_VAL(str), ZSTR_LEN(str), &mbfl_encoding_utf8);
4792+
#endif
4793+
}
4794+
45904795
static bool mb_check_str_encoding(zend_string *str, const mbfl_encoding *encoding)
45914796
{
45924797
if (encoding == &mbfl_encoding_utf8) {
45934798
if (GC_FLAGS(str) & IS_STR_VALID_UTF8) {
45944799
return true;
45954800
}
4596-
bool result = php_mb_check_encoding(ZSTR_VAL(str), ZSTR_LEN(str), encoding);
4801+
bool result = mb_fast_check_utf8(str);
45974802
if (result && !ZSTR_IS_INTERNED(str)) {
45984803
GC_ADD_FLAGS(str, IS_STR_VALID_UTF8);
45994804
}

ext/mbstring/tests/utf_encodings.phpt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,20 @@ $invalid = array(
813813

814814
testInvalidCodepoints($invalid, 'UTF-8');
815815

816+
// Regression test for bug in SSE2-based accelerated UTF-8 validation function
817+
$truncated16byte = [
818+
"k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc6",
819+
"k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xef",
820+
"k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xef\xbf",
821+
"k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0",
822+
"k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\xbf",
823+
"k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\xbf\xbf"
824+
];
825+
foreach ($truncated16byte as $trunc) {
826+
if (mb_check_encoding($trunc, 'UTF-8'))
827+
die("UTF-8 validation was incorrect on 16-byte string with truncated multi-byte char at end");
828+
}
829+
816830
echo "== UTF-16 ==\n";
817831

818832
testValidCodepoints("UTF-16");

ext/mysqlnd/mysqlnd.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,6 @@ PHPAPI void mysqlnd_local_infile_default(MYSQLND_CONN_DATA * conn);
194194
#define mysqlnd_ping(conn) ((conn)->data)->m->ping((conn)->data)
195195
#define mysqlnd_kill(conn, pid) ((conn)->data)->m->kill_connection((conn)->data, (pid))
196196
#define mysqlnd_refresh(conn, options) ((conn)->data)->m->refresh_server((conn)->data, (options))
197-
#define mysqlnd_shutdown(conn, level) ((conn)->data)->m->shutdown_server((conn)->data, (level))
198197
#define mysqlnd_set_character_set(conn, cs) ((conn)->data)->m->set_charset((conn)->data, (cs))
199198
#define mysqlnd_stat(conn, msg) ((conn)->data)->m->get_server_statistics(((conn)->data), (msg))
200199
#define mysqlnd_options(conn, opt, value) ((conn)->data)->m->set_client_option((conn)->data, (opt), (value))

0 commit comments

Comments
 (0)