From d357c59bfb569d199276a0792ae2492d1fc87348 Mon Sep 17 00:00:00 2001 From: wxue1 Date: Wed, 19 Apr 2023 02:21:19 -0700 Subject: [PATCH] Cacheline demote to improve performance Once code is emitted to JIT buffer, hint the hardware to demote the corresponding cache lines to more distant level so other CPUs can access them more quickly. This gets nearly 1% performance gain on our workload. Signed-off-by: Xue,Wang Signed-off-by: Tao,Su Signed-off-by: Hu,chen --- Zend/zend_cpuinfo.h | 11 +++++++++++ ext/opcache/jit/zend_jit.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/Zend/zend_cpuinfo.h b/Zend/zend_cpuinfo.h index 31e7c54e0b6f0..9d221c59e541a 100644 --- a/Zend/zend_cpuinfo.h +++ b/Zend/zend_cpuinfo.h @@ -258,4 +258,15 @@ static inline int zend_cpu_supports_pclmul(void) { } #endif +/* __builtin_cpu_supports has cldemote from gcc11 */ +#if PHP_HAVE_BUILTIN_CPU_SUPPORTS && defined(__GNUC__) && (ZEND_GCC_VERSION >= 11000) +ZEND_NO_SANITIZE_ADDRESS +static inline int zend_cpu_supports_cldemote(void) { +#if PHP_HAVE_BUILTIN_CPU_INIT + __builtin_cpu_init(); +#endif + return __builtin_cpu_supports("cldemote"); +} +#endif + #endif diff --git a/ext/opcache/jit/zend_jit.c b/ext/opcache/jit/zend_jit.c index 231c976d0bab7..ad9ac7d8f64ad 100644 --- a/ext/opcache/jit/zend_jit.c +++ b/ext/opcache/jit/zend_jit.c @@ -144,6 +144,31 @@ static zend_jit_trace_info *zend_jit_get_current_trace_info(void); static uint32_t zend_jit_trace_find_exit_point(const void* addr); #endif +#if ZEND_JIT_TARGET_X86 && defined(__linux__) +# if PHP_HAVE_BUILTIN_CPU_SUPPORTS && defined(__GNUC__) && (ZEND_GCC_VERSION >= 11000) +# define ZEND_JIT_SUPPORT_CLDEMOTE 1 +# else +# define ZEND_JIT_SUPPORT_CLDEMOTE 0 +# endif +#endif + +#if ZEND_JIT_SUPPORT_CLDEMOTE +#include +#pragma GCC push_options +#pragma GCC target("cldemote") +// check cldemote by CPUID when JIT startup +static int cpu_support_cldemote = 0; +static inline void shared_cacheline_demote(uintptr_t start, size_t size) { + uintptr_t cache_line_base = start & ~0x3F; + do { + _cldemote((void *)cache_line_base); + // next cacheline start size + cache_line_base += 64; + } while (cache_line_base < start + size); +} +#pragma GCC pop_options +#endif + static int zend_jit_assign_to_variable(dasm_State **Dst, const zend_op *opline, zend_jit_addr var_use_addr, @@ -973,6 +998,12 @@ static void *dasm_link_and_encode(dasm_State **dasm_state, /* flush the hardware I-cache */ JIT_CACHE_FLUSH(entry, entry + size); + /* hint to the hardware to push out the cache line that contains the linear address */ +#if ZEND_JIT_SUPPORT_CLDEMOTE + if (cpu_support_cldemote && JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + shared_cacheline_demote((uintptr_t)entry, size); + } +#endif if (trace_num) { zend_jit_trace_add_code(entry, dasm_getpclabel(dasm_state, 1)); @@ -4901,6 +4932,10 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) zend_jit_gdb_init(); #endif +#if ZEND_JIT_SUPPORT_CLDEMOTE + cpu_support_cldemote = zend_cpu_supports_cldemote(); +#endif + #ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP zend_write_protect = pthread_jit_write_protect_supported_np(); #endif