From 14b04a89e12a74f661aedc183453742f3c8b6d04 Mon Sep 17 00:00:00 2001 From: Tony Su Date: Wed, 15 Mar 2023 17:41:10 +0800 Subject: [PATCH] [zend_hash]: Use AVX2 instructions for better code efficiency We prefer to use AVX2 instructions for code efficiency improvement 1) Reduce instruction path length Generic x86 Instr: 16, SSE2: 6, AVX2: 4 2) Better ICache locality and density To enable AVX2 instructions, compile with '-mavx2' option via CFLAGS environment variable or command line argument. Note: '-mavx' option still leads to using SSE2 instructions. _mm256_cmpeq_epi64() requires AVX2 (-mavx2). Testing: Build with and without '-mavx2', 'make TEST_PHP_ARGS=-j8 test' presented the same test report. Signed-off-by: Tony Su --- Zend/zend_hash.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index 93d10119519b0..9c09dfdc274a6 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -26,7 +26,10 @@ # include #endif -#ifdef __SSE2__ +/* Prefer to use AVX2 instructions for better latency and throughput */ +#if defined(__AVX2__) +# include +#elif defined( __SSE2__) # include # include #endif @@ -176,7 +179,14 @@ static zend_always_inline void zend_hash_real_init_mixed_ex(HashTable *ht) HT_SET_DATA_ADDR(ht, data); /* Don't overwrite iterator count. */ ht->u.v.flags = HASH_FLAG_STATIC_KEYS; -#ifdef __SSE2__ +#if defined(__AVX2__) + do { + __m256i ymm0 = _mm256_setzero_si256(); + ymm0 = _mm256_cmpeq_epi64(ymm0, ymm0); + _mm256_storeu_si256((__m256i*)&HT_HASH_EX(data, 0), ymm0); + _mm256_storeu_si256((__m256i*)&HT_HASH_EX(data, 8), ymm0); + } while(0); +#elif defined (__SSE2__) do { __m128i xmm0 = _mm_setzero_si128(); xmm0 = _mm_cmpeq_epi8(xmm0, xmm0);