Skip to content

Commit 547e053

Browse files
committed
Add upper case functions to zend_operators.c
Add a family of upper case conversion functions to zend_operators.c, by analogy with the lower case functions. Move the single-character conversion macros to the header so that they can be used as a locale-independent replacement for tolower() and toupper(). Factor out the ugly bits of the SSE2 case conversion so that the four functions that use it are easy to read and processor-independent.
1 parent 25c2642 commit 547e053

File tree

2 files changed

+198
-32
lines changed

2 files changed

+198
-32
lines changed

Zend/zend_operators.c

Lines changed: 186 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,38 @@ static _locale_t current_locale = NULL;
5454

5555
#define TYPE_PAIR(t1,t2) (((t1) << 4) | (t2))
5656

57-
static const unsigned char tolower_map[256] = {
57+
#if __SSE2__
58+
#define HAVE_BLOCKCONV
59+
60+
/* Common code for SSE2 accelerated character case conversion */
61+
62+
#define BLOCKCONV_INIT_RANGE(start, end) \
63+
const __m128i blconv_start_minus_1 = _mm_set1_epi8((start) - 1); \
64+
const __m128i blconv_end_plus_1 = _mm_set1_epi8((end) + 1); \
65+
__m128i blconv_operand, blconv_gt, blconv_lt, blconv_mingle;
66+
67+
#define BLOCKCONV_STRIDE sizeof(__m128i)
68+
69+
#define BLOCKCONV_INIT_DELTA(delta) \
70+
const __m128i blconv_delta = _mm_set1_epi8(delta); \
71+
__m128i blconv_add, blconv_result;
72+
73+
#define BLOCKCONV_LOAD(input) \
74+
blconv_operand = _mm_loadu_si128((__m128i*)(input)); \
75+
blconv_gt = _mm_cmpgt_epi8(blconv_operand, blconv_start_minus_1); \
76+
blconv_lt = _mm_cmplt_epi8(blconv_operand, blconv_end_plus_1); \
77+
blconv_mingle = _mm_and_si128(blconv_gt, blconv_lt);
78+
79+
#define BLOCKCONV_FOUND() _mm_movemask_epi8(blconv_mingle)
80+
81+
#define BLOCKCONV_STORE(dest) \
82+
blconv_add = _mm_and_si128(blconv_mingle, blconv_delta); \
83+
blconv_result = _mm_add_epi8(blconv_operand, blconv_add); \
84+
_mm_storeu_si128((__m128i *)(dest), blconv_result);
85+
86+
#endif /* __SSE2__ */
87+
88+
ZEND_API const unsigned char zend_tolower_map[256] = {
5889
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
5990
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
6091
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
@@ -73,7 +104,25 @@ static const unsigned char tolower_map[256] = {
73104
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
74105
};
75106

76-
#define zend_tolower_ascii(c) (tolower_map[(unsigned char)(c)])
107+
ZEND_API const unsigned char zend_toupper_map[256] = {
108+
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
109+
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
110+
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
111+
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
112+
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
113+
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
114+
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
115+
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f,
116+
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
117+
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
118+
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
119+
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
120+
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
121+
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
122+
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
123+
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
124+
};
125+
77126

78127
/**
79128
* Functions using locale lowercase:
@@ -2665,22 +2714,16 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str
26652714
unsigned char *p = (unsigned char*)str;
26662715
unsigned char *q = (unsigned char*)dest;
26672716
unsigned char *end = p + length;
2668-
#ifdef __SSE2__
2669-
if (length >= 16) {
2670-
const __m128i _A = _mm_set1_epi8('A' - 1);
2671-
const __m128i Z_ = _mm_set1_epi8('Z' + 1);
2672-
const __m128i delta = _mm_set1_epi8('a' - 'A');
2717+
#ifdef HAVE_BLOCKCONV
2718+
if (length >= BLOCKCONV_STRIDE) {
2719+
BLOCKCONV_INIT_RANGE('A', 'Z');
2720+
BLOCKCONV_INIT_DELTA('a' - 'A');
26732721
do {
2674-
__m128i op = _mm_loadu_si128((__m128i*)p);
2675-
__m128i gt = _mm_cmpgt_epi8(op, _A);
2676-
__m128i lt = _mm_cmplt_epi8(op, Z_);
2677-
__m128i mingle = _mm_and_si128(gt, lt);
2678-
__m128i add = _mm_and_si128(mingle, delta);
2679-
__m128i lower = _mm_add_epi8(op, add);
2680-
_mm_storeu_si128((__m128i *)q, lower);
2681-
p += 16;
2682-
q += 16;
2683-
} while (p + 16 <= end);
2722+
BLOCKCONV_LOAD(p);
2723+
BLOCKCONV_STORE(q);
2724+
p += BLOCKCONV_STRIDE;
2725+
q += BLOCKCONV_STRIDE;
2726+
} while (p + BLOCKCONV_STRIDE <= end);
26842727
}
26852728
#endif
26862729
while (p < end) {
@@ -2689,6 +2732,28 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str
26892732
}
26902733
/* }}} */
26912734

2735+
static zend_always_inline void zend_str_toupper_impl(char *dest, const char *str, size_t length) /* {{{ */ {
2736+
unsigned char *p = (unsigned char*)str;
2737+
unsigned char *q = (unsigned char*)dest;
2738+
unsigned char *end = p + length;
2739+
#ifdef HAVE_BLOCKCONV
2740+
if (length >= BLOCKCONV_STRIDE) {
2741+
BLOCKCONV_INIT_RANGE('a', 'z');
2742+
BLOCKCONV_INIT_DELTA('A' - 'a');
2743+
do {
2744+
BLOCKCONV_LOAD(p);
2745+
BLOCKCONV_STORE(q);
2746+
p += BLOCKCONV_STRIDE;
2747+
q += BLOCKCONV_STRIDE;
2748+
} while (p + BLOCKCONV_STRIDE <= end);
2749+
}
2750+
#endif
2751+
while (p < end) {
2752+
*q++ = zend_toupper_ascii(*p++);
2753+
}
2754+
}
2755+
/* }}} */
2756+
26922757
ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length) /* {{{ */
26932758
{
26942759
zend_str_tolower_impl(dest, source, length);
@@ -2697,18 +2762,39 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *sourc
26972762
}
26982763
/* }}} */
26992764

2765+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_copy(char *dest, const char *source, size_t length) /* {{{ */
2766+
{
2767+
zend_str_toupper_impl(dest, source, length);
2768+
dest[length] = '\0';
2769+
return dest;
2770+
}
2771+
/* }}} */
2772+
27002773
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t length) /* {{{ */
27012774
{
27022775
return zend_str_tolower_copy((char *)emalloc(length+1), source, length);
27032776
}
27042777
/* }}} */
27052778

2779+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup(const char *source, size_t length) /* {{{ */
2780+
{
2781+
return zend_str_toupper_copy((char *)emalloc(length+1), source, length);
2782+
}
2783+
/* }}} */
2784+
27062785
ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length) /* {{{ */
27072786
{
27082787
zend_str_tolower_impl(str, (const char*)str, length);
27092788
}
27102789
/* }}} */
27112790

2791+
ZEND_API void ZEND_FASTCALL zend_str_toupper(char *str, size_t length) /* {{{ */
2792+
{
2793+
zend_str_toupper_impl(str, (const char*)str, length);
2794+
}
2795+
/* }}} */
2796+
2797+
27122798
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t length) /* {{{ */
27132799
{
27142800
const unsigned char *p = (const unsigned char*)source;
@@ -2733,38 +2819,57 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t
27332819
}
27342820
/* }}} */
27352821

2822+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup_ex(const char *source, size_t length) /* {{{ */
2823+
{
2824+
const unsigned char *p = (const unsigned char*)source;
2825+
const unsigned char *end = p + length;
2826+
2827+
while (p < end) {
2828+
if (*p != zend_toupper_ascii(*p)) {
2829+
char *res = (char*)emalloc(length + 1);
2830+
unsigned char *r;
2831+
2832+
if (p != (const unsigned char*)source) {
2833+
memcpy(res, source, p - (const unsigned char*)source);
2834+
}
2835+
r = (unsigned char*)p + (res - source);
2836+
zend_str_toupper_impl((char *)r, (const char*)p, end - p);
2837+
res[length] = '\0';
2838+
return res;
2839+
}
2840+
p++;
2841+
}
2842+
return NULL;
2843+
}
2844+
/* }}} */
2845+
27362846
ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, bool persistent) /* {{{ */
27372847
{
27382848
size_t length = ZSTR_LEN(str);
27392849
unsigned char *p = (unsigned char *) ZSTR_VAL(str);
27402850
unsigned char *end = p + length;
27412851

2742-
#ifdef __SSE2__
2743-
while (p + 16 <= end) {
2744-
const __m128i _A = _mm_set1_epi8('A' - 1);
2745-
const __m128i Z_ = _mm_set1_epi8('Z' + 1);
2746-
__m128i op = _mm_loadu_si128((__m128i*)p);
2747-
__m128i gt = _mm_cmpgt_epi8(op, _A);
2748-
__m128i lt = _mm_cmplt_epi8(op, Z_);
2749-
__m128i mingle = _mm_and_si128(gt, lt);
2750-
if (_mm_movemask_epi8(mingle)) {
2852+
#ifdef HAVE_BLOCKCONV
2853+
BLOCKCONV_INIT_RANGE('A', 'Z');
2854+
while (p + BLOCKCONV_STRIDE <= end) {
2855+
BLOCKCONV_LOAD(p);
2856+
if (BLOCKCONV_FOUND()) {
27512857
zend_string *res = zend_string_alloc(length, persistent);
27522858
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char *) ZSTR_VAL(str));
27532859
unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
27542860

27552861
/* Lowercase the chunk we already compared. */
2756-
const __m128i delta = _mm_set1_epi8('a' - 'A');
2757-
__m128i add = _mm_and_si128(mingle, delta);
2758-
__m128i lower = _mm_add_epi8(op, add);
2759-
_mm_storeu_si128((__m128i *) q, lower);
2862+
BLOCKCONV_INIT_DELTA('a' - 'A');
2863+
BLOCKCONV_STORE(q);
27602864

27612865
/* Lowercase the rest of the string. */
2762-
p += 16; q += 16;
2866+
p += BLOCKCONV_STRIDE;
2867+
q += BLOCKCONV_STRIDE;
27632868
zend_str_tolower_impl((char *) q, (const char *) p, end - p);
27642869
ZSTR_VAL(res)[length] = '\0';
27652870
return res;
27662871
}
2767-
p += 16;
2872+
p += BLOCKCONV_STRIDE;
27682873
}
27692874
#endif
27702875

@@ -2787,6 +2892,55 @@ ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, boo
27872892
}
27882893
/* }}} */
27892894

2895+
ZEND_API zend_string* ZEND_FASTCALL zend_string_toupper_ex(zend_string *str, bool persistent) /* {{{ */
2896+
{
2897+
size_t length = ZSTR_LEN(str);
2898+
unsigned char *p = (unsigned char *) ZSTR_VAL(str);
2899+
unsigned char *end = p + length;
2900+
2901+
#ifdef HAVE_BLOCKCONV
2902+
BLOCKCONV_INIT_RANGE('a', 'z');
2903+
while (p + BLOCKCONV_STRIDE <= end) {
2904+
BLOCKCONV_LOAD(p);
2905+
if (BLOCKCONV_FOUND()) {
2906+
zend_string *res = zend_string_alloc(length, persistent);
2907+
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char *) ZSTR_VAL(str));
2908+
unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
2909+
2910+
/* Uppercase the chunk we already compared. */
2911+
BLOCKCONV_INIT_DELTA('A' - 'a');
2912+
BLOCKCONV_STORE(q);
2913+
2914+
/* Uppercase the rest of the string. */
2915+
p += BLOCKCONV_STRIDE;
2916+
q += BLOCKCONV_STRIDE;
2917+
zend_str_toupper_impl((char *) q, (const char *) p, end - p);
2918+
ZSTR_VAL(res)[length] = '\0';
2919+
return res;
2920+
}
2921+
p += BLOCKCONV_STRIDE;
2922+
}
2923+
#endif
2924+
2925+
while (p < end) {
2926+
if (*p != zend_toupper_ascii(*p)) {
2927+
zend_string *res = zend_string_alloc(length, persistent);
2928+
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char*) ZSTR_VAL(str));
2929+
2930+
unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
2931+
while (p < end) {
2932+
*q++ = zend_toupper_ascii(*p++);
2933+
}
2934+
ZSTR_VAL(res)[length] = '\0';
2935+
return res;
2936+
}
2937+
p++;
2938+
}
2939+
2940+
return zend_string_copy(str);
2941+
}
2942+
/* }}} */
2943+
27902944
ZEND_API int ZEND_FASTCALL zend_binary_strcmp(const char *s1, size_t len1, const char *s2, size_t len2) /* {{{ */
27912945
{
27922946
int retval;

Zend/zend_operators.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,13 +433,25 @@ ZEND_API int ZEND_FASTCALL string_compare_function(zval *op1, zval *op2);
433433
ZEND_API int ZEND_FASTCALL string_case_compare_function(zval *op1, zval *op2);
434434
ZEND_API int ZEND_FASTCALL string_locale_compare_function(zval *op1, zval *op2);
435435

436+
ZEND_API extern const unsigned char zend_tolower_map[256];
437+
ZEND_API extern const unsigned char zend_toupper_map[256];
438+
439+
#define zend_tolower_ascii(c) (zend_tolower_map[(unsigned char)(c)])
440+
#define zend_toupper_ascii(c) (zend_toupper_map[(unsigned char)(c)])
441+
436442
ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length);
443+
ZEND_API void ZEND_FASTCALL zend_str_toupper(char *str, size_t length);
437444
ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length);
445+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_copy(char *dest, const char *source, size_t length);
438446
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t length);
447+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup(const char *source, size_t length);
439448
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t length);
449+
ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup_ex(const char *source, size_t length);
440450
ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, bool persistent);
451+
ZEND_API zend_string* ZEND_FASTCALL zend_string_toupper_ex(zend_string *str, bool persistent);
441452

442453
#define zend_string_tolower(str) zend_string_tolower_ex(str, 0)
454+
#define zend_string_toupper(str) zend_string_toupper_ex(str, 0)
443455

444456
ZEND_API int ZEND_FASTCALL zend_binary_zval_strcmp(zval *s1, zval *s2);
445457
ZEND_API int ZEND_FASTCALL zend_binary_zval_strncmp(zval *s1, zval *s2, zval *s3);

0 commit comments

Comments
 (0)