Skip to content

Commit da0c705

Browse files
authored
Add upper case functions to zend_operators.c and use them (#7521)
Add a family of upper case conversion functions to zend_operators.c, by analogy with the lower case functions. Move the single-character conversion macros to the header so that they can be used as a locale-independent replacement for tolower() and toupper(). Factor out the ugly bits of the SSE2 case conversion so that the four functions that use it are easy to read and processor-independent. Use the new ASCII upper case functions in ext/xml, ext/pdo_dblib and as an optimization for strtoupper() when the locale is "C".
1 parent 9d8f97d commit da0c705

File tree

7 files changed

+236
-34
lines changed

7 files changed

+236
-34
lines changed

Zend/zend_operators.c

Lines changed: 184 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,36 @@ static _locale_t current_locale = NULL;
5454

5555
#define TYPE_PAIR(t1,t2) (((t1) << 4) | (t2))
5656

57-
static const unsigned char tolower_map[256] = {
57+
#if __SSE2__
58+
#define HAVE_BLOCKCONV
59+
60+
/* Common code for SSE2 accelerated character case conversion */
61+
62+
#define BLOCKCONV_INIT_RANGE(start, end) \
63+
const __m128i blconv_start_minus_1 = _mm_set1_epi8((start) - 1); \
64+
const __m128i blconv_end_plus_1 = _mm_set1_epi8((end) + 1);
65+
66+
#define BLOCKCONV_STRIDE sizeof(__m128i)
67+
68+
#define BLOCKCONV_INIT_DELTA(delta) \
69+
const __m128i blconv_delta = _mm_set1_epi8(delta);
70+
71+
#define BLOCKCONV_LOAD(input) \
72+
__m128i blconv_operand = _mm_loadu_si128((__m128i*)(input)); \
73+
__m128i blconv_gt = _mm_cmpgt_epi8(blconv_operand, blconv_start_minus_1); \
74+
__m128i blconv_lt = _mm_cmplt_epi8(blconv_operand, blconv_end_plus_1); \
75+
__m128i blconv_mingle = _mm_and_si128(blconv_gt, blconv_lt);
76+
77+
#define BLOCKCONV_FOUND() _mm_movemask_epi8(blconv_mingle)
78+
79+
#define BLOCKCONV_STORE(dest) \
80+
__m128i blconv_add = _mm_and_si128(blconv_mingle, blconv_delta); \
81+
__m128i blconv_result = _mm_add_epi8(blconv_operand, blconv_add); \
82+
_mm_storeu_si128((__m128i *)(dest), blconv_result);
83+
84+
#endif /* __SSE2__ */
85+
86+
ZEND_API const unsigned char zend_tolower_map[256] = {
5887
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
5988
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
6089
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
@@ -73,7 +102,25 @@ static const unsigned char tolower_map[256] = {
73102
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
74103
};
75104

76-
#define zend_tolower_ascii(c) (tolower_map[(unsigned char)(c)])
105+
ZEND_API const unsigned char zend_toupper_map[256] = {
106+
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
107+
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
108+
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
109+
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
110+
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
111+
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
112+
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
113+
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f,
114+
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
115+
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
116+
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
117+
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
118+
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
119+
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
120+
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
121+
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
122+
};
123+
77124

78125
/**
79126
* Functions using locale lowercase:
@@ -2665,22 +2712,16 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str
26652712
unsigned char *p = (unsigned char*)str;
26662713
unsigned char *q = (unsigned char*)dest;
26672714
unsigned char *end = p + length;
2668-
#ifdef __SSE2__
2669-
if (length >= 16) {
2670-
const __m128i _A = _mm_set1_epi8('A' - 1);
2671-
const __m128i Z_ = _mm_set1_epi8('Z' + 1);
2672-
const __m128i delta = _mm_set1_epi8('a' - 'A');
2715+
#ifdef HAVE_BLOCKCONV
2716+
if (length >= BLOCKCONV_STRIDE) {
2717+
BLOCKCONV_INIT_RANGE('A', 'Z');
2718+
BLOCKCONV_INIT_DELTA('a' - 'A');
26732719
do {
2674-
__m128i op = _mm_loadu_si128((__m128i*)p);
2675-
__m128i gt = _mm_cmpgt_epi8(op, _A);
2676-
__m128i lt = _mm_cmplt_epi8(op, Z_);
2677-
__m128i mingle = _mm_and_si128(gt, lt);
2678-
__m128i add = _mm_and_si128(mingle, delta);
2679-
__m128i lower = _mm_add_epi8(op, add);
2680-
_mm_storeu_si128((__m128i *)q, lower);
2681-
p += 16;
2682-
q += 16;
2683-
} while (p + 16 <= end);
2720+
BLOCKCONV_LOAD(p);
2721+
BLOCKCONV_STORE(q);
2722+
p += BLOCKCONV_STRIDE;
2723+
q += BLOCKCONV_STRIDE;
2724+
} while (p + BLOCKCONV_STRIDE <= end);
26842725
}
26852726
#endif
26862727
while (p < end) {
@@ -2689,6 +2730,28 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str
26892730
}
26902731
/* }}} */
26912732

2733+
static zend_always_inline void zend_str_toupper_impl(char *dest, const char *str, size_t length) /* {{{ */ {
2734+
unsigned char *p = (unsigned char*)str;
2735+
unsigned char *q = (unsigned char*)dest;
2736+
unsigned char *end = p + length;
2737+
#ifdef HAVE_BLOCKCONV
2738+
if (length >= BLOCKCONV_STRIDE) {
2739+
BLOCKCONV_INIT_RANGE('a', 'z');
2740+
BLOCKCONV_INIT_DELTA('A' - 'a');
2741+
do {
2742+
BLOCKCONV_LOAD(p);
2743+
BLOCKCONV_STORE(q);
2744+
p += BLOCKCONV_STRIDE;
2745+
q += BLOCKCONV_STRIDE;
2746+
} while (p + BLOCKCONV_STRIDE <= end);
2747+
}
2748+
#endif
2749+
while (p < end) {
2750+
*q++ = zend_toupper_ascii(*p++);
2751+
}
2752+
}
2753+
/* }}} */
2754+
26922755
ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length) /* {{{ */
26932756
{
26942757
zend_str_tolower_impl(dest, source, length);
@@ -2697,18 +2760,39 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *sourc
26972760
}
26982761
/* }}} */
26992762

2763+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_copy(char *dest, const char *source, size_t length) /* {{{ */
2764+
{
2765+
zend_str_toupper_impl(dest, source, length);
2766+
dest[length] = '\0';
2767+
return dest;
2768+
}
2769+
/* }}} */
2770+
27002771
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t length) /* {{{ */
27012772
{
27022773
return zend_str_tolower_copy((char *)emalloc(length+1), source, length);
27032774
}
27042775
/* }}} */
27052776

2777+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup(const char *source, size_t length) /* {{{ */
2778+
{
2779+
return zend_str_toupper_copy((char *)emalloc(length+1), source, length);
2780+
}
2781+
/* }}} */
2782+
27062783
ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length) /* {{{ */
27072784
{
27082785
zend_str_tolower_impl(str, (const char*)str, length);
27092786
}
27102787
/* }}} */
27112788

2789+
ZEND_API void ZEND_FASTCALL zend_str_toupper(char *str, size_t length) /* {{{ */
2790+
{
2791+
zend_str_toupper_impl(str, (const char*)str, length);
2792+
}
2793+
/* }}} */
2794+
2795+
27122796
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t length) /* {{{ */
27132797
{
27142798
const unsigned char *p = (const unsigned char*)source;
@@ -2733,38 +2817,57 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t
27332817
}
27342818
/* }}} */
27352819

2820+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup_ex(const char *source, size_t length) /* {{{ */
2821+
{
2822+
const unsigned char *p = (const unsigned char*)source;
2823+
const unsigned char *end = p + length;
2824+
2825+
while (p < end) {
2826+
if (*p != zend_toupper_ascii(*p)) {
2827+
char *res = (char*)emalloc(length + 1);
2828+
unsigned char *r;
2829+
2830+
if (p != (const unsigned char*)source) {
2831+
memcpy(res, source, p - (const unsigned char*)source);
2832+
}
2833+
r = (unsigned char*)p + (res - source);
2834+
zend_str_toupper_impl((char *)r, (const char*)p, end - p);
2835+
res[length] = '\0';
2836+
return res;
2837+
}
2838+
p++;
2839+
}
2840+
return NULL;
2841+
}
2842+
/* }}} */
2843+
27362844
ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, bool persistent) /* {{{ */
27372845
{
27382846
size_t length = ZSTR_LEN(str);
27392847
unsigned char *p = (unsigned char *) ZSTR_VAL(str);
27402848
unsigned char *end = p + length;
27412849

2742-
#ifdef __SSE2__
2743-
while (p + 16 <= end) {
2744-
const __m128i _A = _mm_set1_epi8('A' - 1);
2745-
const __m128i Z_ = _mm_set1_epi8('Z' + 1);
2746-
__m128i op = _mm_loadu_si128((__m128i*)p);
2747-
__m128i gt = _mm_cmpgt_epi8(op, _A);
2748-
__m128i lt = _mm_cmplt_epi8(op, Z_);
2749-
__m128i mingle = _mm_and_si128(gt, lt);
2750-
if (_mm_movemask_epi8(mingle)) {
2850+
#ifdef HAVE_BLOCKCONV
2851+
BLOCKCONV_INIT_RANGE('A', 'Z');
2852+
while (p + BLOCKCONV_STRIDE <= end) {
2853+
BLOCKCONV_LOAD(p);
2854+
if (BLOCKCONV_FOUND()) {
27512855
zend_string *res = zend_string_alloc(length, persistent);
27522856
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char *) ZSTR_VAL(str));
27532857
unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
27542858

27552859
/* Lowercase the chunk we already compared. */
2756-
const __m128i delta = _mm_set1_epi8('a' - 'A');
2757-
__m128i add = _mm_and_si128(mingle, delta);
2758-
__m128i lower = _mm_add_epi8(op, add);
2759-
_mm_storeu_si128((__m128i *) q, lower);
2860+
BLOCKCONV_INIT_DELTA('a' - 'A');
2861+
BLOCKCONV_STORE(q);
27602862

27612863
/* Lowercase the rest of the string. */
2762-
p += 16; q += 16;
2864+
p += BLOCKCONV_STRIDE;
2865+
q += BLOCKCONV_STRIDE;
27632866
zend_str_tolower_impl((char *) q, (const char *) p, end - p);
27642867
ZSTR_VAL(res)[length] = '\0';
27652868
return res;
27662869
}
2767-
p += 16;
2870+
p += BLOCKCONV_STRIDE;
27682871
}
27692872
#endif
27702873

@@ -2787,6 +2890,55 @@ ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, boo
27872890
}
27882891
/* }}} */
27892892

2893+
ZEND_API zend_string* ZEND_FASTCALL zend_string_toupper_ex(zend_string *str, bool persistent) /* {{{ */
2894+
{
2895+
size_t length = ZSTR_LEN(str);
2896+
unsigned char *p = (unsigned char *) ZSTR_VAL(str);
2897+
unsigned char *end = p + length;
2898+
2899+
#ifdef HAVE_BLOCKCONV
2900+
BLOCKCONV_INIT_RANGE('a', 'z');
2901+
while (p + BLOCKCONV_STRIDE <= end) {
2902+
BLOCKCONV_LOAD(p);
2903+
if (BLOCKCONV_FOUND()) {
2904+
zend_string *res = zend_string_alloc(length, persistent);
2905+
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char *) ZSTR_VAL(str));
2906+
unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
2907+
2908+
/* Uppercase the chunk we already compared. */
2909+
BLOCKCONV_INIT_DELTA('A' - 'a');
2910+
BLOCKCONV_STORE(q);
2911+
2912+
/* Uppercase the rest of the string. */
2913+
p += BLOCKCONV_STRIDE;
2914+
q += BLOCKCONV_STRIDE;
2915+
zend_str_toupper_impl((char *) q, (const char *) p, end - p);
2916+
ZSTR_VAL(res)[length] = '\0';
2917+
return res;
2918+
}
2919+
p += BLOCKCONV_STRIDE;
2920+
}
2921+
#endif
2922+
2923+
while (p < end) {
2924+
if (*p != zend_toupper_ascii(*p)) {
2925+
zend_string *res = zend_string_alloc(length, persistent);
2926+
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char*) ZSTR_VAL(str));
2927+
2928+
unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
2929+
while (p < end) {
2930+
*q++ = zend_toupper_ascii(*p++);
2931+
}
2932+
ZSTR_VAL(res)[length] = '\0';
2933+
return res;
2934+
}
2935+
p++;
2936+
}
2937+
2938+
return zend_string_copy(str);
2939+
}
2940+
/* }}} */
2941+
27902942
ZEND_API int ZEND_FASTCALL zend_binary_strcmp(const char *s1, size_t len1, const char *s2, size_t len2) /* {{{ */
27912943
{
27922944
int retval;

Zend/zend_operators.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,13 +433,25 @@ ZEND_API int ZEND_FASTCALL string_compare_function(zval *op1, zval *op2);
433433
ZEND_API int ZEND_FASTCALL string_case_compare_function(zval *op1, zval *op2);
434434
ZEND_API int ZEND_FASTCALL string_locale_compare_function(zval *op1, zval *op2);
435435

436+
ZEND_API extern const unsigned char zend_tolower_map[256];
437+
ZEND_API extern const unsigned char zend_toupper_map[256];
438+
439+
#define zend_tolower_ascii(c) (zend_tolower_map[(unsigned char)(c)])
440+
#define zend_toupper_ascii(c) (zend_toupper_map[(unsigned char)(c)])
441+
436442
ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length);
443+
ZEND_API void ZEND_FASTCALL zend_str_toupper(char *str, size_t length);
437444
ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length);
445+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_copy(char *dest, const char *source, size_t length);
438446
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t length);
447+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup(const char *source, size_t length);
439448
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t length);
449+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup_ex(const char *source, size_t length);
440450
ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, bool persistent);
451+
ZEND_API zend_string* ZEND_FASTCALL zend_string_toupper_ex(zend_string *str, bool persistent);
441452

442453
#define zend_string_tolower(str) zend_string_tolower_ex(str, 0)
454+
#define zend_string_toupper(str) zend_string_toupper_ex(str, 0)
443455

444456
ZEND_API int ZEND_FASTCALL zend_binary_zval_strcmp(zval *s1, zval *s2);
445457
ZEND_API int ZEND_FASTCALL zend_binary_zval_strncmp(zval *s1, zval *s2, zval *s3);

ext/pdo_dblib/dblib_stmt.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ static int pdo_dblib_stmt_get_col(pdo_stmt_t *stmt, int colno, zval *zv, enum pd
438438
tmp_data_len = 36;
439439
tmp_data = safe_emalloc(tmp_data_len, sizeof(char), 1);
440440
data_len = dbconvert(NULL, SQLUNIQUE, data, data_len, SQLCHAR, (LPBYTE) tmp_data, tmp_data_len);
441-
php_strtoupper(tmp_data, data_len);
441+
zend_str_toupper(tmp_data, data_len);
442442
ZVAL_STRINGL(zv, tmp_data, data_len);
443443
efree(tmp_data);
444444
} else {

ext/standard/string.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,6 +1362,9 @@ PHPAPI zend_string *php_string_toupper(zend_string *s)
13621362
unsigned char *c;
13631363
const unsigned char *e;
13641364

1365+
if (EXPECTED(!BG(ctype_string))) {
1366+
return zend_string_toupper(s);
1367+
}
13651368
c = (unsigned char *)ZSTR_VAL(s);
13661369
e = c + ZSTR_LEN(s);
13671370

ext/standard/tests/strings/strtolower.phpt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ $strings = array (
4141
"ABCD\0abcdABCD",
4242
TRUE,
4343
FALSE,
44+
45+
/* Check for off-by-one errors in the SSE implementation */
46+
"AAAAAAAAAAAAAAAAAAAA",
47+
"ZZZZZZZZZZZZZZZZZZZZ",
48+
"@@@@@@@@@@@@@@@@@@@@",
49+
"[[[[[[[[[[[[[[[[[[[[",
4450
);
4551

4652
$count = 0;
@@ -218,6 +224,18 @@ string(1) "1"
218224
-- Iteration 7 --
219225
string(0) ""
220226

227+
-- Iteration 8 --
228+
string(20) "aaaaaaaaaaaaaaaaaaaa"
229+
230+
-- Iteration 9 --
231+
string(20) "zzzzzzzzzzzzzzzzzzzz"
232+
233+
-- Iteration 10 --
234+
string(20) "@@@@@@@@@@@@@@@@@@@@"
235+
236+
-- Iteration 11 --
237+
string(20) "[[[[[[[[[[[[[[[[[[[["
238+
221239
*** Testing strtolower() with two different case strings ***
222240
strings are same, with Case Insensitive
223241
*** Done ***

0 commit comments

Comments
 (0)