Skip to content

Commit a65cdd9

Browse files
authored
Implement NEON-accelerated version of BLOCKCONV for lowercasing and uppercasing strings (#11161)
Since lowercasing and uppercasing is a common operation for both internal purposes and userland purposes, it makes sense to implement a NEON accelerated version for this.
1 parent f6c0c60 commit a65cdd9

File tree

1 file changed

+27
-1
lines changed

1 file changed

+27
-1
lines changed

Zend/zend_operators.c

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
#ifdef __SSE2__
4242
#include <emmintrin.h>
4343
#endif
44+
#if defined(__aarch64__) || defined(_M_ARM64)
45+
#include <arm_neon.h>
46+
#endif
4447

4548
#if defined(ZEND_WIN32) && !defined(ZTS) && defined(_MSC_VER)
4649
/* This performance improvement of tolower() on Windows gives 10-18% on bench.php */
@@ -105,7 +108,30 @@ static _locale_t current_locale = NULL;
105108
__m128i blconv_result = _mm_add_epi8(blconv_operand, blconv_add); \
106109
_mm_storeu_si128((__m128i *)(dest), blconv_result);
107110

108-
#endif /* __SSE2__ */
111+
#elif defined(__aarch64__) || defined(_M_ARM64)
112+
#define HAVE_BLOCKCONV
113+
114+
#define BLOCKCONV_INIT_RANGE(start, end) \
115+
const int8x16_t blconv_offset = vdupq_n_s8((signed char)(SCHAR_MIN - start)); \
116+
const int8x16_t blconv_threshold = vdupq_n_s8(SCHAR_MIN + (end - start) + 1);
117+
118+
#define BLOCKCONV_STRIDE sizeof(int8x16_t)
119+
120+
#define BLOCKCONV_INIT_DELTA(delta) \
121+
const int8x16_t blconv_delta = vdupq_n_s8(delta);
122+
123+
#define BLOCKCONV_LOAD(input) \
124+
int8x16_t blconv_operand = vld1q_s8((const int8_t*)(input)); \
125+
uint8x16_t blconv_mask = vcltq_s8(vaddq_s8(blconv_operand, blconv_offset), blconv_threshold);
126+
127+
#define BLOCKCONV_FOUND() vmaxvq_u8(blconv_mask)
128+
129+
#define BLOCKCONV_STORE(dest) \
130+
int8x16_t blconv_add = vandq_s8(vreinterpretq_s8_u8(blconv_mask), blconv_delta); \
131+
int8x16_t blconv_result = vaddq_s8(blconv_operand, blconv_add); \
132+
vst1q_s8((int8_t *)(dest), blconv_result);
133+
134+
#endif /* defined(__aarch64__) || defined(_M_ARM64) */
109135

110136
ZEND_API const unsigned char zend_tolower_map[256] = {
111137
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,

0 commit comments

Comments
 (0)