Skip to content

Commit a46954f

Browse files
committed
Optimize HT_HASH_RESET
Commit d835de1 added support for AVX2 in hash table initialization code. The same kind of code also occurs for HT_HASH_RESET. However, this place was forgotten in that patch. That is unfortunate, because a loop is just when there may be the most benefit from this SIMD sequence. Furthermore, the NEON special handling exists in the initialization code but is also missing from HT_HASH_RESET, so add this as well.
1 parent 7de83e2 commit a46954f

File tree

1 file changed

+36
-1
lines changed

1 file changed

+36
-1
lines changed

Zend/zend_types.h

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@
3131
# include <mmintrin.h>
3232
# include <emmintrin.h>
3333
#endif
34+
#if defined(__AVX2__)
35+
# include <immintrin.h>
36+
#endif
37+
#if defined(__aarch64__) || defined(_M_ARM64)
38+
# include <arm_neon.h>
39+
#endif
3440

3541
#ifdef WORDS_BIGENDIAN
3642
# define ZEND_ENDIAN_LOHI(lo, hi) hi; lo;
@@ -460,7 +466,21 @@ struct _zend_array {
460466
HT_PACKED_SIZE_EX((ht)->nTableSize, (ht)->nTableMask)
461467
#define HT_PACKED_USED_SIZE(ht) \
462468
(HT_HASH_SIZE((ht)->nTableMask) + ((size_t)(ht)->nNumUsed * sizeof(zval)))
463-
#ifdef __SSE2__
469+
#if defined(__AVX2__)
470+
# define HT_HASH_RESET(ht) do { \
471+
char *p = (char*)&HT_HASH(ht, (ht)->nTableMask); \
472+
size_t size = HT_HASH_SIZE((ht)->nTableMask); \
473+
__m256i ymm0 = _mm256_setzero_si256(); \
474+
ymm0 = _mm256_cmpeq_epi64(ymm0, ymm0); \
475+
ZEND_ASSERT(size >= 64 && ((size & 0x3f) == 0)); \
476+
do { \
477+
_mm256_storeu_si256((__m256i*)p, ymm0); \
478+
_mm256_storeu_si256((__m256i*)(p+32), ymm0); \
479+
p += 64; \
480+
size -= 64; \
481+
} while (size != 0); \
482+
} while (0)
483+
#elif defined(__SSE2__)
464484
# define HT_HASH_RESET(ht) do { \
465485
char *p = (char*)&HT_HASH(ht, (ht)->nTableMask); \
466486
size_t size = HT_HASH_SIZE((ht)->nTableMask); \
@@ -476,6 +496,21 @@ struct _zend_array {
476496
size -= 64; \
477497
} while (size != 0); \
478498
} while (0)
499+
#elif defined(__aarch64__) || defined(_M_ARM64)
500+
# define HT_HASH_RESET(ht) do { \
501+
char *p = (char*)&HT_HASH(ht, (ht)->nTableMask); \
502+
size_t size = HT_HASH_SIZE((ht)->nTableMask); \
503+
int32x4_t t = vdupq_n_s32(-1); \
504+
ZEND_ASSERT(size >= 64 && ((size & 0x3f) == 0)); \
505+
do { \
506+
vst1q_s32((int32_t*)p, t); \
507+
vst1q_s32((int32_t*)(p+16), t); \
508+
vst1q_s32((int32_t*)(p+32), t); \
509+
vst1q_s32((int32_t*)(p+48), t); \
510+
p += 64; \
511+
size -= 64; \
512+
} while (size != 0); \
513+
} while (0)
479514
#else
480515
# define HT_HASH_RESET(ht) \
481516
memset(&HT_HASH(ht, (ht)->nTableMask), HT_INVALID_IDX, HT_HASH_SIZE((ht)->nTableMask))

0 commit comments

Comments
 (0)