Skip to content

Add upper case functions to zend_operators.c and use them #7521

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 184 additions & 32 deletions Zend/zend_operators.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,36 @@ static _locale_t current_locale = NULL;

#define TYPE_PAIR(t1,t2) (((t1) << 4) | (t2))

static const unsigned char tolower_map[256] = {
#if __SSE2__
#define HAVE_BLOCKCONV
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does the BLOCKCONV prefix come from?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am introducing it here, for block (case) conversion. Feel free to suggest a better name.


/* Common code for SSE2 accelerated character case conversion */

#define BLOCKCONV_INIT_RANGE(start, end) \
const __m128i blconv_start_minus_1 = _mm_set1_epi8((start) - 1); \
const __m128i blconv_end_plus_1 = _mm_set1_epi8((end) + 1);

#define BLOCKCONV_STRIDE sizeof(__m128i)

#define BLOCKCONV_INIT_DELTA(delta) \
const __m128i blconv_delta = _mm_set1_epi8(delta);

#define BLOCKCONV_LOAD(input) \
__m128i blconv_operand = _mm_loadu_si128((__m128i*)(input)); \
__m128i blconv_gt = _mm_cmpgt_epi8(blconv_operand, blconv_start_minus_1); \
__m128i blconv_lt = _mm_cmplt_epi8(blconv_operand, blconv_end_plus_1); \
__m128i blconv_mingle = _mm_and_si128(blconv_gt, blconv_lt);

#define BLOCKCONV_FOUND() _mm_movemask_epi8(blconv_mingle)

#define BLOCKCONV_STORE(dest) \
__m128i blconv_add = _mm_and_si128(blconv_mingle, blconv_delta); \
__m128i blconv_result = _mm_add_epi8(blconv_operand, blconv_add); \
_mm_storeu_si128((__m128i *)(dest), blconv_result);

#endif /* __SSE2__ */

ZEND_API const unsigned char zend_tolower_map[256] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
Expand All @@ -73,7 +102,25 @@ static const unsigned char tolower_map[256] = {
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
};

#define zend_tolower_ascii(c) (tolower_map[(unsigned char)(c)])
ZEND_API const unsigned char zend_toupper_map[256] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
};


/**
* Functions using locale lowercase:
Expand Down Expand Up @@ -2665,22 +2712,16 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str
unsigned char *p = (unsigned char*)str;
unsigned char *q = (unsigned char*)dest;
unsigned char *end = p + length;
#ifdef __SSE2__
if (length >= 16) {
const __m128i _A = _mm_set1_epi8('A' - 1);
const __m128i Z_ = _mm_set1_epi8('Z' + 1);
const __m128i delta = _mm_set1_epi8('a' - 'A');
#ifdef HAVE_BLOCKCONV
if (length >= BLOCKCONV_STRIDE) {
BLOCKCONV_INIT_RANGE('A', 'Z');
BLOCKCONV_INIT_DELTA('a' - 'A');
do {
__m128i op = _mm_loadu_si128((__m128i*)p);
__m128i gt = _mm_cmpgt_epi8(op, _A);
__m128i lt = _mm_cmplt_epi8(op, Z_);
__m128i mingle = _mm_and_si128(gt, lt);
__m128i add = _mm_and_si128(mingle, delta);
__m128i lower = _mm_add_epi8(op, add);
_mm_storeu_si128((__m128i *)q, lower);
p += 16;
q += 16;
} while (p + 16 <= end);
BLOCKCONV_LOAD(p);
BLOCKCONV_STORE(q);
p += BLOCKCONV_STRIDE;
q += BLOCKCONV_STRIDE;
} while (p + BLOCKCONV_STRIDE <= end);
}
#endif
while (p < end) {
Expand All @@ -2689,6 +2730,28 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str
}
/* }}} */

static zend_always_inline void zend_str_toupper_impl(char *dest, const char *str, size_t length) /* {{{ */ {
unsigned char *p = (unsigned char*)str;
unsigned char *q = (unsigned char*)dest;
unsigned char *end = p + length;
#ifdef HAVE_BLOCKCONV
if (length >= BLOCKCONV_STRIDE) {
BLOCKCONV_INIT_RANGE('a', 'z');
BLOCKCONV_INIT_DELTA('A' - 'a');
do {
BLOCKCONV_LOAD(p);
BLOCKCONV_STORE(q);
p += BLOCKCONV_STRIDE;
q += BLOCKCONV_STRIDE;
} while (p + BLOCKCONV_STRIDE <= end);
}
#endif
while (p < end) {
*q++ = zend_toupper_ascii(*p++);
}
}
/* }}} */

ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length) /* {{{ */
{
zend_str_tolower_impl(dest, source, length);
Expand All @@ -2697,18 +2760,39 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *sourc
}
/* }}} */

ZEND_API char* ZEND_FASTCALL zend_str_toupper_copy(char *dest, const char *source, size_t length) /* {{{ */
{
zend_str_toupper_impl(dest, source, length);
dest[length] = '\0';
return dest;
}
/* }}} */

ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t length) /* {{{ */
{
return zend_str_tolower_copy((char *)emalloc(length+1), source, length);
}
/* }}} */

ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup(const char *source, size_t length) /* {{{ */
{
return zend_str_toupper_copy((char *)emalloc(length+1), source, length);
}
/* }}} */

ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length) /* {{{ */
{
zend_str_tolower_impl(str, (const char*)str, length);
}
/* }}} */

ZEND_API void ZEND_FASTCALL zend_str_toupper(char *str, size_t length) /* {{{ */
{
zend_str_toupper_impl(str, (const char*)str, length);
}
/* }}} */


ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t length) /* {{{ */
{
const unsigned char *p = (const unsigned char*)source;
Expand All @@ -2733,38 +2817,57 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t
}
/* }}} */

ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup_ex(const char *source, size_t length) /* {{{ */
{
const unsigned char *p = (const unsigned char*)source;
const unsigned char *end = p + length;

while (p < end) {
if (*p != zend_toupper_ascii(*p)) {
char *res = (char*)emalloc(length + 1);
unsigned char *r;

if (p != (const unsigned char*)source) {
memcpy(res, source, p - (const unsigned char*)source);
}
r = (unsigned char*)p + (res - source);
zend_str_toupper_impl((char *)r, (const char*)p, end - p);
res[length] = '\0';
return res;
}
p++;
}
return NULL;
}
/* }}} */

ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, bool persistent) /* {{{ */
{
size_t length = ZSTR_LEN(str);
unsigned char *p = (unsigned char *) ZSTR_VAL(str);
unsigned char *end = p + length;

#ifdef __SSE2__
while (p + 16 <= end) {
const __m128i _A = _mm_set1_epi8('A' - 1);
const __m128i Z_ = _mm_set1_epi8('Z' + 1);
__m128i op = _mm_loadu_si128((__m128i*)p);
__m128i gt = _mm_cmpgt_epi8(op, _A);
__m128i lt = _mm_cmplt_epi8(op, Z_);
__m128i mingle = _mm_and_si128(gt, lt);
if (_mm_movemask_epi8(mingle)) {
#ifdef HAVE_BLOCKCONV
BLOCKCONV_INIT_RANGE('A', 'Z');
while (p + BLOCKCONV_STRIDE <= end) {
BLOCKCONV_LOAD(p);
if (BLOCKCONV_FOUND()) {
zend_string *res = zend_string_alloc(length, persistent);
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char *) ZSTR_VAL(str));
unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));

/* Lowercase the chunk we already compared. */
const __m128i delta = _mm_set1_epi8('a' - 'A');
__m128i add = _mm_and_si128(mingle, delta);
__m128i lower = _mm_add_epi8(op, add);
_mm_storeu_si128((__m128i *) q, lower);
BLOCKCONV_INIT_DELTA('a' - 'A');
BLOCKCONV_STORE(q);

/* Lowercase the rest of the string. */
p += 16; q += 16;
p += BLOCKCONV_STRIDE;
q += BLOCKCONV_STRIDE;
zend_str_tolower_impl((char *) q, (const char *) p, end - p);
ZSTR_VAL(res)[length] = '\0';
return res;
}
p += 16;
p += BLOCKCONV_STRIDE;
}
#endif

Expand All @@ -2787,6 +2890,55 @@ ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, boo
}
/* }}} */

ZEND_API zend_string* ZEND_FASTCALL zend_string_toupper_ex(zend_string *str, bool persistent) /* {{{ */
{
size_t length = ZSTR_LEN(str);
unsigned char *p = (unsigned char *) ZSTR_VAL(str);
unsigned char *end = p + length;

#ifdef HAVE_BLOCKCONV
BLOCKCONV_INIT_RANGE('a', 'z');
while (p + BLOCKCONV_STRIDE <= end) {
BLOCKCONV_LOAD(p);
if (BLOCKCONV_FOUND()) {
zend_string *res = zend_string_alloc(length, persistent);
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char *) ZSTR_VAL(str));
unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));

/* Uppercase the chunk we already compared. */
BLOCKCONV_INIT_DELTA('A' - 'a');
BLOCKCONV_STORE(q);

/* Uppercase the rest of the string. */
p += BLOCKCONV_STRIDE;
q += BLOCKCONV_STRIDE;
zend_str_toupper_impl((char *) q, (const char *) p, end - p);
ZSTR_VAL(res)[length] = '\0';
return res;
}
p += BLOCKCONV_STRIDE;
}
#endif

while (p < end) {
if (*p != zend_toupper_ascii(*p)) {
zend_string *res = zend_string_alloc(length, persistent);
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char*) ZSTR_VAL(str));

unsigned char *q = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
while (p < end) {
*q++ = zend_toupper_ascii(*p++);
}
ZSTR_VAL(res)[length] = '\0';
return res;
}
p++;
}

return zend_string_copy(str);
}
/* }}} */

ZEND_API int ZEND_FASTCALL zend_binary_strcmp(const char *s1, size_t len1, const char *s2, size_t len2) /* {{{ */
{
int retval;
Expand Down
12 changes: 12 additions & 0 deletions Zend/zend_operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -433,13 +433,25 @@ ZEND_API int ZEND_FASTCALL string_compare_function(zval *op1, zval *op2);
ZEND_API int ZEND_FASTCALL string_case_compare_function(zval *op1, zval *op2);
ZEND_API int ZEND_FASTCALL string_locale_compare_function(zval *op1, zval *op2);

ZEND_API extern const unsigned char zend_tolower_map[256];
ZEND_API extern const unsigned char zend_toupper_map[256];

#define zend_tolower_ascii(c) (zend_tolower_map[(unsigned char)(c)])
#define zend_toupper_ascii(c) (zend_toupper_map[(unsigned char)(c)])

ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length);
ZEND_API void ZEND_FASTCALL zend_str_toupper(char *str, size_t length);
ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length);
ZEND_API char* ZEND_FASTCALL zend_str_toupper_copy(char *dest, const char *source, size_t length);
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t length);
ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup(const char *source, size_t length);
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t length);
ZEND_API char* ZEND_FASTCALL zend_str_toupper_dup_ex(const char *source, size_t length);
ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, bool persistent);
ZEND_API zend_string* ZEND_FASTCALL zend_string_toupper_ex(zend_string *str, bool persistent);

#define zend_string_tolower(str) zend_string_tolower_ex(str, 0)
#define zend_string_toupper(str) zend_string_toupper_ex(str, 0)

ZEND_API int ZEND_FASTCALL zend_binary_zval_strcmp(zval *s1, zval *s2);
ZEND_API int ZEND_FASTCALL zend_binary_zval_strncmp(zval *s1, zval *s2, zval *s3);
Expand Down
2 changes: 1 addition & 1 deletion ext/pdo_dblib/dblib_stmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ static int pdo_dblib_stmt_get_col(pdo_stmt_t *stmt, int colno, zval *zv, enum pd
tmp_data_len = 36;
tmp_data = safe_emalloc(tmp_data_len, sizeof(char), 1);
data_len = dbconvert(NULL, SQLUNIQUE, data, data_len, SQLCHAR, (LPBYTE) tmp_data, tmp_data_len);
php_strtoupper(tmp_data, data_len);
zend_str_toupper(tmp_data, data_len);
ZVAL_STRINGL(zv, tmp_data, data_len);
efree(tmp_data);
} else {
Expand Down
3 changes: 3 additions & 0 deletions ext/standard/string.c
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,9 @@ PHPAPI zend_string *php_string_toupper(zend_string *s)
unsigned char *c;
const unsigned char *e;

if (EXPECTED(!BG(ctype_string))) {
return zend_string_toupper(s);
}
c = (unsigned char *)ZSTR_VAL(s);
e = c + ZSTR_LEN(s);

Expand Down
18 changes: 18 additions & 0 deletions ext/standard/tests/strings/strtolower.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ $strings = array (
"ABCD\0abcdABCD",
TRUE,
FALSE,

/* Check for off-by-one errors in the SSE implementation */
"AAAAAAAAAAAAAAAAAAAA",
"ZZZZZZZZZZZZZZZZZZZZ",
"@@@@@@@@@@@@@@@@@@@@",
"[[[[[[[[[[[[[[[[[[[[",
);

$count = 0;
Expand Down Expand Up @@ -218,6 +224,18 @@ string(1) "1"
-- Iteration 7 --
string(0) ""

-- Iteration 8 --
string(20) "aaaaaaaaaaaaaaaaaaaa"

-- Iteration 9 --
string(20) "zzzzzzzzzzzzzzzzzzzz"

-- Iteration 10 --
string(20) "@@@@@@@@@@@@@@@@@@@@"

-- Iteration 11 --
string(20) "[[[[[[[[[[[[[[[[[[[["

*** Testing strtolower() with two different case strings ***
strings are same, with Case Insensitive
*** Done ***
Loading