From db54dc383523facfddbcb860186f71d407d1a9b5 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Tue, 28 Sep 2021 09:32:37 +1000 Subject: [PATCH 1/3] Add upper case functions to zend_operators.c Add a family of upper case conversion functions to zend_operators.c, by analogy with the lower case functions. Move the single-character conversion macros to the header so that they can be used as a locale-independent replacement for tolower() and toupper(). Factor out the ugly bits of the SSE2 case conversion so that the four functions that use it are easy to read and processor-independent. --- Zend/zend_operators.c | 216 +++++++++++++++++++++++++++++++++++------- Zend/zend_operators.h | 12 +++ 2 files changed, 196 insertions(+), 32 deletions(-) diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index a6488e532920f..4d3fe72476988 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -54,7 +54,36 @@ static _locale_t current_locale = NULL; #define TYPE_PAIR(t1,t2) (((t1) << 4) | (t2)) -static const unsigned char tolower_map[256] = { +#if __SSE2__ +#define HAVE_BLOCKCONV + +/* Common code for SSE2 accelerated character case conversion */ + +#define BLOCKCONV_INIT_RANGE(start, end) \ + const __m128i blconv_start_minus_1 = _mm_set1_epi8((start) - 1); \ + const __m128i blconv_end_plus_1 = _mm_set1_epi8((end) + 1); + +#define BLOCKCONV_STRIDE sizeof(__m128i) + +#define BLOCKCONV_INIT_DELTA(delta) \ + const __m128i blconv_delta = _mm_set1_epi8(delta); + +#define BLOCKCONV_LOAD(input) \ + __m128i blconv_operand = _mm_loadu_si128((__m128i*)(input)); \ + __m128i blconv_gt = _mm_cmpgt_epi8(blconv_operand, blconv_start_minus_1); \ + __m128i blconv_lt = _mm_cmplt_epi8(blconv_operand, blconv_end_plus_1); \ + __m128i blconv_mingle = _mm_and_si128(blconv_gt, blconv_lt); + +#define BLOCKCONV_FOUND() _mm_movemask_epi8(blconv_mingle) + +#define BLOCKCONV_STORE(dest) \ + __m128i blconv_add = _mm_and_si128(blconv_mingle, blconv_delta); \ + __m128i blconv_result = _mm_add_epi8(blconv_operand, blconv_add); \ + _mm_storeu_si128((__m128i *)(dest), blconv_result); + +#endif /* __SSE2__ */ + +ZEND_API const unsigned char zend_tolower_map[256] = { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, @@ -73,7 +102,25 @@ static const unsigned char tolower_map[256] = { 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff }; -#define zend_tolower_ascii(c) (tolower_map[(unsigned char)(c)]) +ZEND_API const unsigned char zend_toupper_map[256] = { +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, +0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f, +0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, +0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, +0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, +0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, +0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, +0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, +0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, +0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff +}; + /** * Functions using locale lowercase: @@ -2665,22 +2712,16 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str unsigned char *p = (unsigned char*)str; unsigned char *q = (unsigned char*)dest; unsigned char *end = p + length; -#ifdef __SSE2__ - if (length >= 16) { - const __m128i _A = _mm_set1_epi8('A' - 1); - const __m128i Z_ = _mm_set1_epi8('Z' + 1); - const __m128i delta = _mm_set1_epi8('a' - 'A'); +#ifdef HAVE_BLOCKCONV + if (length >= BLOCKCONV_STRIDE) { + BLOCKCONV_INIT_RANGE('A', 'Z'); + BLOCKCONV_INIT_DELTA('a' - 'A'); do { - __m128i op = _mm_loadu_si128((__m128i*)p); - __m128i gt = _mm_cmpgt_epi8(op, _A); - __m128i lt = _mm_cmplt_epi8(op, Z_); - __m128i mingle = _mm_and_si128(gt, lt); - __m128i add = _mm_and_si128(mingle, delta); - __m128i lower = _mm_add_epi8(op, add); - _mm_storeu_si128((__m128i *)q, lower); - p += 16; - q += 16; - } while (p + 16 <= end); + BLOCKCONV_LOAD(p); + BLOCKCONV_STORE(q); + p += BLOCKCONV_STRIDE; + q += BLOCKCONV_STRIDE; + } while (p + BLOCKCONV_STRIDE <= end); } #endif while (p < end) { @@ -2689,6 +2730,28 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str } /* }}} */ +static zend_always_inline void zend_str_toupper_impl(char *dest, const char *str, size_t length) /* {{{ */ { + unsigned char *p = (unsigned char*)str; + unsigned char *q = (unsigned char*)dest; + unsigned char *end = p + length; +#ifdef HAVE_BLOCKCONV + if (length >= BLOCKCONV_STRIDE) { + BLOCKCONV_INIT_RANGE('a', 'z'); + BLOCKCONV_INIT_DELTA('A' - 'a'); + do { + BLOCKCONV_LOAD(p); + BLOCKCONV_STORE(q); + p += BLOCKCONV_STRIDE; + q += BLOCKCONV_STRIDE; + } while (p + BLOCKCONV_STRIDE <= end); + } +#endif + while (p < end) { + *q++ = zend_toupper_ascii(*p++); + } +} +/* }}} */ + ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length) /* {{{ */ { zend_str_tolower_impl(dest, source, length); @@ -2697,18 +2760,39 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *sourc } /* }}} */ +ZEND_API char* ZEND_FASTCALL zend_str_toupper_copy(char *dest, const char *source, size_t length) /* {{{ */ +{ + zend_str_toupper_impl(dest, source, length); + dest[length] = '\0'; + return dest; +} +/* }}} */ + ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t length) /* {{{ */ { return zend_str_tolower_copy((char *)emalloc(length+1), source, length); } /* }}} */ +ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup(const char *source, size_t length) /* {{{ */ +{ + return zend_str_toupper_copy((char *)emalloc(length+1), source, length); +} +/* }}} */ + ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length) /* {{{ */ { zend_str_tolower_impl(str, (const char*)str, length); } /* }}} */ +ZEND_API void ZEND_FASTCALL zend_str_toupper(char *str, size_t length) /* {{{ */ +{ + zend_str_toupper_impl(str, (const char*)str, length); +} +/* }}} */ + + ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t length) /* {{{ */ { const unsigned char *p = (const unsigned char*)source; @@ -2733,38 +2817,57 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t } /* }}} */ +ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup_ex(const char *source, size_t length) /* {{{ */ +{ + const unsigned char *p = (const unsigned char*)source; + const unsigned char *end = p + length; + + while (p < end) { + if (*p != zend_toupper_ascii(*p)) { + char *res = (char*)emalloc(length + 1); + unsigned char *r; + + if (p != (const unsigned char*)source) { + memcpy(res, source, p - (const unsigned char*)source); + } + r = (unsigned char*)p + (res - source); + zend_str_toupper_impl((char *)r, (const char*)p, end - p); + res[length] = '\0'; + return res; + } + p++; + } + return NULL; +} +/* }}} */ + ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, bool persistent) /* {{{ */ { size_t length = ZSTR_LEN(str); unsigned char *p = (unsigned char *) ZSTR_VAL(str); unsigned char *end = p + length; -#ifdef __SSE2__ - while (p + 16 <= end) { - const __m128i _A = _mm_set1_epi8('A' - 1); - const __m128i Z_ = _mm_set1_epi8('Z' + 1); - __m128i op = _mm_loadu_si128((__m128i*)p); - __m128i gt = _mm_cmpgt_epi8(op, _A); - __m128i lt = _mm_cmplt_epi8(op, Z_); - __m128i mingle = _mm_and_si128(gt, lt); - if (_mm_movemask_epi8(mingle)) { +#ifdef HAVE_BLOCKCONV + BLOCKCONV_INIT_RANGE('A', 'Z'); + while (p + BLOCKCONV_STRIDE <= end) { + BLOCKCONV_LOAD(p); + if (BLOCKCONV_FOUND()) { zend_string *res = zend_string_alloc(length, persistent); memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char *) ZSTR_VAL(str)); unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str)); /* Lowercase the chunk we already compared. */ - const __m128i delta = _mm_set1_epi8('a' - 'A'); - __m128i add = _mm_and_si128(mingle, delta); - __m128i lower = _mm_add_epi8(op, add); - _mm_storeu_si128((__m128i *) q, lower); + BLOCKCONV_INIT_DELTA('a' - 'A'); + BLOCKCONV_STORE(q); /* Lowercase the rest of the string. */ - p += 16; q += 16; + p += BLOCKCONV_STRIDE; + q += BLOCKCONV_STRIDE; zend_str_tolower_impl((char *) q, (const char *) p, end - p); ZSTR_VAL(res)[length] = '\0'; return res; } - p += 16; + p += BLOCKCONV_STRIDE; } #endif @@ -2787,6 +2890,55 @@ ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, boo } /* }}} */ +ZEND_API zend_string* ZEND_FASTCALL zend_string_toupper_ex(zend_string *str, bool persistent) /* {{{ */ +{ + size_t length = ZSTR_LEN(str); + unsigned char *p = (unsigned char *) ZSTR_VAL(str); + unsigned char *end = p + length; + +#ifdef HAVE_BLOCKCONV + BLOCKCONV_INIT_RANGE('a', 'z'); + while (p + BLOCKCONV_STRIDE <= end) { + BLOCKCONV_LOAD(p); + if (BLOCKCONV_FOUND()) { + zend_string *res = zend_string_alloc(length, persistent); + memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char *) ZSTR_VAL(str)); + unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str)); + + /* Uppercase the chunk we already compared. */ + BLOCKCONV_INIT_DELTA('A' - 'a'); + BLOCKCONV_STORE(q); + + /* Uppercase the rest of the string. */ + p += BLOCKCONV_STRIDE; + q += BLOCKCONV_STRIDE; + zend_str_toupper_impl((char *) q, (const char *) p, end - p); + ZSTR_VAL(res)[length] = '\0'; + return res; + } + p += BLOCKCONV_STRIDE; + } +#endif + + while (p < end) { + if (*p != zend_toupper_ascii(*p)) { + zend_string *res = zend_string_alloc(length, persistent); + memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char*) ZSTR_VAL(str)); + + unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str)); + while (p < end) { + *q++ = zend_toupper_ascii(*p++); + } + ZSTR_VAL(res)[length] = '\0'; + return res; + } + p++; + } + + return zend_string_copy(str); +} +/* }}} */ + ZEND_API int ZEND_FASTCALL zend_binary_strcmp(const char *s1, size_t len1, const char *s2, size_t len2) /* {{{ */ { int retval; diff --git a/Zend/zend_operators.h b/Zend/zend_operators.h index c92456fb1a385..a09e821373868 100644 --- a/Zend/zend_operators.h +++ b/Zend/zend_operators.h @@ -433,13 +433,25 @@ ZEND_API int ZEND_FASTCALL string_compare_function(zval *op1, zval *op2); ZEND_API int ZEND_FASTCALL string_case_compare_function(zval *op1, zval *op2); ZEND_API int ZEND_FASTCALL string_locale_compare_function(zval *op1, zval *op2); +ZEND_API extern const unsigned char zend_tolower_map[256]; +ZEND_API extern const unsigned char zend_toupper_map[256]; + +#define zend_tolower_ascii(c) (zend_tolower_map[(unsigned char)(c)]) +#define zend_toupper_ascii(c) (zend_toupper_map[(unsigned char)(c)]) + ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length); +ZEND_API void ZEND_FASTCALL zend_str_toupper(char *str, size_t length); ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length); +ZEND_API char* ZEND_FASTCALL zend_str_toupper_copy(char *dest, const char *source, size_t length); ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t length); +ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup(const char *source, size_t length); ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t length); +ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup_ex(const char *source, size_t length); ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, bool persistent); +ZEND_API zend_string* ZEND_FASTCALL zend_string_toupper_ex(zend_string *str, bool persistent); #define zend_string_tolower(str) zend_string_tolower_ex(str, 0) +#define zend_string_toupper(str) zend_string_toupper_ex(str, 0) ZEND_API int ZEND_FASTCALL zend_binary_zval_strcmp(zval *s1, zval *s2); ZEND_API int ZEND_FASTCALL zend_binary_zval_strncmp(zval *s1, zval *s2, zval *s3); From 19479cbda0316c24bfdd48263de2cd5128c276c8 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Wed, 22 Sep 2021 15:38:39 +1000 Subject: [PATCH 2/3] SSE-oriented strtolower/strtoupper tests --- ext/standard/tests/strings/strtolower.phpt | 18 ++++++++++++++++++ ext/standard/tests/strings/strtoupper1.phpt | 17 +++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/ext/standard/tests/strings/strtolower.phpt b/ext/standard/tests/strings/strtolower.phpt index efea4022d05c9..37a38493ce114 100644 --- a/ext/standard/tests/strings/strtolower.phpt +++ b/ext/standard/tests/strings/strtolower.phpt @@ -41,6 +41,12 @@ $strings = array ( "ABCD\0abcdABCD", TRUE, FALSE, + + /* Check for off-by-one errors in the SSE implementation */ + "AAAAAAAAAAAAAAAAAAAA", + "ZZZZZZZZZZZZZZZZZZZZ", + "@@@@@@@@@@@@@@@@@@@@", + "[[[[[[[[[[[[[[[[[[[[", ); $count = 0; @@ -218,6 +224,18 @@ string(1) "1" -- Iteration 7 -- string(0) "" +-- Iteration 8 -- +string(20) "aaaaaaaaaaaaaaaaaaaa" + +-- Iteration 9 -- +string(20) "zzzzzzzzzzzzzzzzzzzz" + +-- Iteration 10 -- +string(20) "@@@@@@@@@@@@@@@@@@@@" + +-- Iteration 11 -- +string(20) "[[[[[[[[[[[[[[[[[[[[" + *** Testing strtolower() with two different case strings *** strings are same, with Case Insensitive *** Done *** diff --git a/ext/standard/tests/strings/strtoupper1.phpt b/ext/standard/tests/strings/strtoupper1.phpt index f0254905dc8a4..7b79f6f457be9 100644 --- a/ext/standard/tests/strings/strtoupper1.phpt +++ b/ext/standard/tests/strings/strtoupper1.phpt @@ -41,6 +41,11 @@ $strings = array ( "ABCD\0abcdABCD", TRUE, FALSE, + /* Check for off-by-one errors in the SSE implementation */ + "aaaaaaaaaaaaaaaaaaaa", + "zzzzzzzzzzzzzzzzzzzz", + "````````````````````", + "{{{{{{{{{{{{{{{{{{{{", ); $count = 0; @@ -219,6 +224,18 @@ string(1) "1" -- Iteration 7 -- string(0) "" +-- Iteration 8 -- +string(20) "AAAAAAAAAAAAAAAAAAAA" + +-- Iteration 9 -- +string(20) "ZZZZZZZZZZZZZZZZZZZZ" + +-- Iteration 10 -- +string(20) "````````````````````" + +-- Iteration 11 -- +string(20) "{{{{{{{{{{{{{{{{{{{{" + *** Testing strtoupper() with two different case strings *** strings are same, with Case Insensitive *** Done *** From 7009053e6c1b2faeab00274881c4b6dd289c45a3 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Tue, 28 Sep 2021 09:41:55 +1000 Subject: [PATCH 3/3] Use the new toupper functions Use the new ASCII upper case functions in ext/xml, ext/pdo_dblib and as an optimization for strtoupper() when the locale is "C". --- ext/pdo_dblib/dblib_stmt.c | 2 +- ext/standard/string.c | 3 +++ ext/xml/xml.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ext/pdo_dblib/dblib_stmt.c b/ext/pdo_dblib/dblib_stmt.c index 2bef466f46b00..f415a8d7f44be 100644 --- a/ext/pdo_dblib/dblib_stmt.c +++ b/ext/pdo_dblib/dblib_stmt.c @@ -438,7 +438,7 @@ static int pdo_dblib_stmt_get_col(pdo_stmt_t *stmt, int colno, zval *zv, enum pd tmp_data_len = 36; tmp_data = safe_emalloc(tmp_data_len, sizeof(char), 1); data_len = dbconvert(NULL, SQLUNIQUE, data, data_len, SQLCHAR, (LPBYTE) tmp_data, tmp_data_len); - php_strtoupper(tmp_data, data_len); + zend_str_toupper(tmp_data, data_len); ZVAL_STRINGL(zv, tmp_data, data_len); efree(tmp_data); } else { diff --git a/ext/standard/string.c b/ext/standard/string.c index 3614e94baceab..e6a4c69a759ce 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -1362,6 +1362,9 @@ PHPAPI zend_string *php_string_toupper(zend_string *s) unsigned char *c; const unsigned char *e; + if (EXPECTED(!BG(ctype_string))) { + return zend_string_toupper(s); + } c = (unsigned char *)ZSTR_VAL(s); e = c + ZSTR_LEN(s); diff --git a/ext/xml/xml.c b/ext/xml/xml.c index 904ad305b173e..5c73837c385a1 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -611,7 +611,7 @@ static zend_string *_xml_decode_tag(xml_parser *parser, const char *tag) str = xml_utf8_decode((const XML_Char *)tag, strlen(tag), parser->target_encoding); if (parser->case_folding) { - php_strtoupper(ZSTR_VAL(str), ZSTR_LEN(str)); + zend_str_toupper(ZSTR_VAL(str), ZSTR_LEN(str)); } return str;