Skip to content

Commit 053f026

Browse files
Merge branch 'php:master' into master
2 parents a42d498 + ce86137 commit 053f026

File tree

5 files changed

+121
-15
lines changed

5 files changed

+121
-15
lines changed

Zend/Optimizer/dfa_pass.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1647,7 +1647,7 @@ void zend_dfa_optimize_op_array(zend_op_array *op_array, zend_optimizer_ctx *ctx
16471647
&& Z_TYPE_P(CT_CONSTANT_EX(op_array, opline->op2.constant)) == IS_LONG
16481648
&& Z_LVAL_P(CT_CONSTANT_EX(op_array, opline->op2.constant)) == 1
16491649
&& ssa->ops[op_1].op1_use >= 0
1650-
&& !(ssa->var_info[ssa->ops[op_1].op1_use].type & (MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF))) {
1650+
&& !(ssa->var_info[ssa->ops[op_1].op1_use].type & (MAY_BE_UNDEF|MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_STRING|MAY_BE_ARRAY|MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_REF))) {
16511651

16521652
// op_1: ASSIGN_SUB #?.CV [undef,null,int,foat] -> #v.CV, int(1) => PRE_DEC #?.CV ->#v.CV
16531653

ext/mbstring/mbstring.c

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1715,13 +1715,85 @@ PHP_FUNCTION(mb_str_split)
17151715
}
17161716
}
17171717

1718+
#ifdef __SSE2__
1719+
/* Thanks to StackOverflow user 'Paul R' (https://stackoverflow.com/users/253056/paul-r)
1720+
* From: https://stackoverflow.com/questions/36998538/fastest-way-to-horizontally-sum-sse-unsigned-byte-vector
1721+
* Takes a 128-bit XMM register, treats each byte as an 8-bit integer, and sums up all
1722+
* 16 of them, returning the sum in an ordinary scalar register */
1723+
static inline uint32_t _mm_sum_epu8(const __m128i v)
1724+
{
1725+
/* We don't have any dedicated instruction to sum up 8-bit values from a 128-bit register
1726+
* _mm_sad_epu8 takes the differences between corresponding bytes of two different XMM registers,
1727+
* sums up those differences, and stores them as two 16-byte integers in the top and bottom
1728+
* halves of the destination XMM register
1729+
* By using a zeroed-out XMM register as one operand, we ensure the "differences" which are
1730+
* summed up will actually just be the 8-bit values from `v` */
1731+
__m128i vsum = _mm_sad_epu8(v, _mm_setzero_si128());
1732+
/* If _mm_sad_epu8 had stored the sum of those bytes as a single integer, we would just have
1733+
* to extract it here; but it stored the sum as two different 16-bit values
1734+
* _mm_cvtsi128_si32 extracts one of those values into a scalar register
1735+
* _mm_extract_epi16 extracts the other one into another scalar register; then we just add them */
1736+
return _mm_cvtsi128_si32(vsum) + _mm_extract_epi16(vsum, 4);
1737+
}
1738+
#endif
1739+
1740+
/* This assumes that `string` is valid UTF-8
1741+
* In UTF-8, the only bytes which do not start a new codepoint are 0x80-0xBF (continuation bytes)
1742+
* Interpreted as signed integers, those are all byte values less than -64
1743+
* A fast way to get the length of a UTF-8 string is to start with its byte length,
1744+
* then subtract off the number of continuation bytes */
1745+
static size_t mb_fast_strlen_utf8(unsigned char *p, size_t len)
1746+
{
1747+
unsigned char *e = p + len;
1748+
1749+
#ifdef __SSE2__
1750+
if (len >= sizeof(__m128i)) {
1751+
const __m128i threshold = _mm_set1_epi8(-64);
1752+
const __m128i delta = _mm_set1_epi8(1);
1753+
__m128i counter = _mm_set1_epi8(0); /* Vector of 16 continuation-byte counters */
1754+
1755+
int reset_counter = 255;
1756+
do {
1757+
__m128i operand = _mm_loadu_si128((__m128i*)p); /* Load 16 bytes */
1758+
__m128i lt = _mm_cmplt_epi8(operand, threshold); /* Find all which are continuation bytes */
1759+
counter = _mm_add_epi8(counter, _mm_and_si128(lt, delta)); /* Update the 16 counters */
1760+
1761+
/* The counters can only go up to 255, so every 255 iterations, fold them into `len`
1762+
* and reset them to zero */
1763+
if (--reset_counter == 0) {
1764+
len -= _mm_sum_epu8(counter);
1765+
counter = _mm_set1_epi8(0);
1766+
reset_counter = 255;
1767+
}
1768+
1769+
p += sizeof(__m128i);
1770+
} while (p + sizeof(__m128i) <= e);
1771+
1772+
len -= _mm_sum_epu8(counter); /* Fold in any remaining non-zero values in the 16 counters */
1773+
}
1774+
#endif
1775+
1776+
/* Check for continuation bytes in the 0-15 remaining bytes at the end of the string */
1777+
while (p < e) {
1778+
signed char c = *p++;
1779+
if (c < -64) {
1780+
len--;
1781+
}
1782+
}
1783+
1784+
return len;
1785+
}
1786+
17181787
static size_t mb_get_strlen(zend_string *string, const mbfl_encoding *encoding)
17191788
{
17201789
unsigned int char_len = encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2 | MBFL_ENCTYPE_WCS4);
17211790
if (char_len) {
17221791
return ZSTR_LEN(string) / char_len;
1792+
} else if (php_mb_is_no_encoding_utf8(encoding->no_encoding) && GC_FLAGS(string) & IS_STR_VALID_UTF8) {
1793+
return mb_fast_strlen_utf8((unsigned char*)ZSTR_VAL(string), ZSTR_LEN(string));
17231794
}
17241795

1796+
17251797
uint32_t wchar_buf[128];
17261798
unsigned char *in = (unsigned char*)ZSTR_VAL(string);
17271799
size_t in_len = ZSTR_LEN(string);
@@ -1789,14 +1861,7 @@ static unsigned char* offset_to_pointer_utf8(unsigned char *str, unsigned char *
17891861
}
17901862

17911863
static size_t pointer_to_offset_utf8(unsigned char *start, unsigned char *pos) {
1792-
size_t result = 0;
1793-
while (pos > start) {
1794-
unsigned char c = *--pos;
1795-
if (c < 0x80 || (c & 0xC0) != 0x80) {
1796-
result++;
1797-
}
1798-
}
1799-
return result;
1864+
return mb_fast_strlen_utf8(start, pos - start);
18001865
}
18011866

18021867
static size_t mb_find_strpos(zend_string *haystack, zend_string *needle, const mbfl_encoding *enc, ssize_t offset, bool reverse)

ext/mbstring/tests/mb_strlen.phpt

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,26 @@ mb_internal_encoding('JIS') or print("mb_internal_encoding() failed\n");
6262
print strlen($jis) . "\n";
6363

6464
echo "== UTF-8 ==\n";
65-
$utf8 = mb_convert_encoding($euc_jp, 'UTF-8','EUC-JP');
66-
print mb_strlen($utf8,'UTF-8') . "\n";
67-
mb_internal_encoding('UTF-8') or print("mb_internal_encoding() failed\n");
68-
print strlen($utf8) . "\n";
65+
$utf8 = mb_convert_encoding($euc_jp, 'UTF-8', 'EUC-JP');
66+
print mb_strlen($utf8,'UTF-8') . " codepoints\n";
67+
mb_internal_encoding('UTF-8') or print("mb_internal_encoding() failed\n");
68+
print strlen($utf8) . " bytes\n";
69+
70+
$utf8 = "abcde あいうえお 汉字 ελληνικά";
71+
$long_utf8 = str_repeat($utf8, 100);
72+
print mb_strlen($utf8, 'UTF-8') . "\n";
73+
print mb_strlen($long_utf8, 'UTF-8') . "\n";
74+
75+
echo "== UTF-8 with performance optimizations ==\n";
76+
// Optimized mb_strlen can be used on UTF-8 strings after they are checked for validity
77+
mb_check_encoding($utf8);
78+
mb_check_encoding($long_utf8);
79+
print mb_strlen($utf8, 'UTF-8') . "\n";
80+
print mb_strlen($long_utf8, 'UTF-8') . "\n";
81+
82+
$str = str_repeat('Σ', 2048); // 2-byte UTF-8 character
83+
mb_check_encoding($str, 'UTF-8');
84+
print mb_strlen($str, 'UTF-8') . "\n";
6985

7086
// Wrong Parameters
7187
echo "== WRONG PARAMETERS ==\n";
@@ -110,7 +126,13 @@ try {
110126
43
111127
90
112128
== UTF-8 ==
113-
43
114-
101
129+
43 codepoints
130+
101 bytes
131+
23
132+
2300
133+
== UTF-8 with performance optimizations ==
134+
23
135+
2300
136+
2048
115137
== WRONG PARAMETERS ==
116138
mb_strlen(): Argument #2 ($encoding) must be a valid encoding, "BAD_NAME" given

ext/opcache/jit/zend_jit_trace.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7104,6 +7104,7 @@ static zend_jit_trace_stop zend_jit_compile_root_trace(zend_jit_trace_rec *trace
71047104
if (t->stack_map_size) {
71057105
zend_jit_trace_stack *shared_stack_map = (zend_jit_trace_stack*)zend_shared_alloc(t->stack_map_size * sizeof(zend_jit_trace_stack));
71067106
if (!shared_stack_map) {
7107+
efree(t->stack_map);
71077108
ret = ZEND_JIT_TRACE_STOP_NO_SHM;
71087109
goto exit;
71097110
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
--TEST--
2+
ASSIGN_OP 002: Incorrect optimization of ASSIGN_OP may lead to incorrect result (sub assign -> pre dec conversion for null values)
3+
--INI--
4+
opcache.enable=1
5+
opcache.enable_cli=1
6+
opcache.optimization_level=-1
7+
--FILE--
8+
<?php
9+
function foo(int $a = null) {
10+
$a -= 1;
11+
return $a;
12+
}
13+
var_dump(foo(2));
14+
var_dump(foo(null));
15+
?>
16+
--EXPECT--
17+
int(1)
18+
int(-1)

0 commit comments

Comments
 (0)