Skip to content

Commit 8ffbd46

Browse files
committed
Perform isupper check using sse2 as well
Rather than just vectorizing the lowering, also vectorize the check for uppercase characters, using the same method.
1 parent 4c24545 commit 8ffbd46

File tree

1 file changed

+42
-11
lines changed

1 file changed

+42
-11
lines changed

Zend/zend_operators.c

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2464,8 +2464,8 @@ ZEND_API void zend_update_current_locale(void) /* {{{ */
24642464
#endif
24652465

24662466
static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str, size_t length) /* {{{ */ {
2467-
register unsigned char *p = (unsigned char*)str;
2468-
register unsigned char *q = (unsigned char*)dest;
2467+
unsigned char *p = (unsigned char*)str;
2468+
unsigned char *q = (unsigned char*)dest;
24692469
unsigned char *end = p + length;
24702470
#ifdef __SSE2__
24712471
if (length >= 16) {
@@ -2537,23 +2537,54 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t
25372537

25382538
ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, int persistent) /* {{{ */
25392539
{
2540-
register unsigned char *p = (unsigned char*)ZSTR_VAL(str);
2541-
register unsigned char *end = p + ZSTR_LEN(str);
2540+
size_t length = ZSTR_LEN(str);
2541+
unsigned char *p = (unsigned char *) ZSTR_VAL(str);
2542+
unsigned char *end = p + length;
2543+
2544+
#ifdef __SSE2__
2545+
while (p + 16 <= end) {
2546+
const __m128i _A = _mm_set1_epi8('A' - 1);
2547+
const __m128i Z_ = _mm_set1_epi8('Z' + 1);
2548+
__m128i op = _mm_loadu_si128((__m128i*)p);
2549+
__m128i gt = _mm_cmpgt_epi8(op, _A);
2550+
__m128i lt = _mm_cmplt_epi8(op, Z_);
2551+
__m128i mingle = _mm_and_si128(gt, lt);
2552+
if (_mm_movemask_epi8(mingle)) {
2553+
zend_string *res = zend_string_alloc(length, persistent);
2554+
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char *) ZSTR_VAL(str));
2555+
unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
2556+
2557+
/* Lowercase the chunk we already compared. */
2558+
const __m128i delta = _mm_set1_epi8('a' - 'A');
2559+
__m128i add = _mm_and_si128(mingle, delta);
2560+
__m128i lower = _mm_add_epi8(op, add);
2561+
_mm_storeu_si128((__m128i *) q, lower);
2562+
2563+
/* Lowercase the rest of the string. */
2564+
p += 16; q += 16;
2565+
zend_str_tolower_impl((char *) q, (const char *) p, end - p);
2566+
ZSTR_VAL(res)[length] = '\0';
2567+
return res;
2568+
}
2569+
p += 16;
2570+
}
2571+
#endif
2572+
25422573
while (p < end) {
25432574
if (*p != zend_tolower_ascii(*p)) {
2544-
zend_string *res = zend_string_alloc(ZSTR_LEN(str), persistent);
2545-
register unsigned char *r;
2575+
zend_string *res = zend_string_alloc(length, persistent);
2576+
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char*) ZSTR_VAL(str));
25462577

2547-
if (p != (unsigned char*)ZSTR_VAL(str)) {
2548-
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char*)ZSTR_VAL(str));
2578+
unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
2579+
while (p < end) {
2580+
*q++ = zend_tolower_ascii(*p++);
25492581
}
2550-
r = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
2551-
zend_str_tolower_impl((char*)r, (const char*)p, end - p);
2552-
ZSTR_VAL(res)[ZSTR_LEN(res)] = '\0';
2582+
ZSTR_VAL(res)[length] = '\0';
25532583
return res;
25542584
}
25552585
p++;
25562586
}
2587+
25572588
return zend_string_copy(str);
25582589
}
25592590
/* }}} */

0 commit comments

Comments
 (0)