Skip to content

Commit c69b476

Browse files
author
wxue1
committed
cacheline demote to improve cache performance
After the JITTed code is generated, cacheline demote would give a hint to hardware to push out the cache line that contains the linear address. This gets nearly 1% performance gain on our workload. Signed-off-by: Xue,Wang <xue1.wang@intel.com> Signed-off-by: Tao,Su <tao.su@intel.com> Signed-off-by: Hu,chen <hu1.chen@intel.com>
1 parent 964f494 commit c69b476

File tree

2 files changed

+47
-0
lines changed

2 files changed

+47
-0
lines changed

Zend/zend_cpuinfo.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,4 +220,15 @@ static inline int zend_cpu_supports_pclmul(void) {
220220
}
221221
#endif
222222

223+
/* __builtin_cpu_supports has cldemote from gcc11 */
224+
#if PHP_HAVE_BUILTIN_CPU_SUPPORTS && defined(__GNUC__) && (ZEND_GCC_VERSION >= 11000)
225+
ZEND_NO_SANITIZE_ADDRESS
226+
static inline int zend_cpu_supports_cldemote(void) {
227+
#if PHP_HAVE_BUILTIN_CPU_INIT
228+
__builtin_cpu_init();
229+
#endif
230+
return __builtin_cpu_supports("cldemote");
231+
}
232+
#endif
233+
223234
#endif

ext/opcache/jit/zend_jit.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,31 @@ static zend_jit_trace_info *zend_jit_get_current_trace_info(void);
137137
static uint32_t zend_jit_trace_find_exit_point(const void* addr);
138138
#endif
139139

140+
#if ZEND_JIT_TARGET_X86 && defined(__linux__)
141+
# if defined(__GNUC__) && (ZEND_GCC_VERSION >= 11000)
142+
# define ZEND_JIT_SUPPORT_CLDEMOTE 1
143+
# else
144+
# define ZEND_JIT_SUPPORT_CLDEMOTE 0
145+
# endif
146+
#endif
147+
148+
#if ZEND_JIT_SUPPORT_CLDEMOTE
149+
#include <immintrin.h>
150+
#pragma GCC push_options
151+
#pragma GCC target("cldemote")
152+
// check cldemote by CPUID when JIT startup
153+
static int cpu_support_cldemote = 0;
154+
static inline void shared_cacheline_demote(uintptr_t start, size_t size) {
155+
uintptr_t cache_line_base = start & ~0x3F;
156+
do {
157+
_cldemote((void *)cache_line_base);
158+
// next cacheline start size
159+
cache_line_base += 64;
160+
} while (cache_line_base < start + size);
161+
}
162+
#pragma GCC pop_options
163+
#endif
164+
140165
static int zend_jit_assign_to_variable(dasm_State **Dst,
141166
const zend_op *opline,
142167
zend_jit_addr var_use_addr,
@@ -972,6 +997,13 @@ static void *dasm_link_and_encode(dasm_State **dasm_state,
972997
/* flush the hardware I-cache */
973998
JIT_CACHE_FLUSH(entry, entry + size);
974999

1000+
/* hint to the hardware to push out the cache line that contains the linear address */
1001+
#if ZEND_JIT_SUPPORT_CLDEMOTE
1002+
if (cpu_support_cldemote && JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) {
1003+
shared_cacheline_demote((uintptr_t)entry, size);
1004+
}
1005+
#endif
1006+
9751007
if (trace_num) {
9761008
zend_jit_trace_add_code(entry, dasm_getpclabel(dasm_state, 1));
9771009
}
@@ -4931,6 +4963,10 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached)
49314963
}
49324964
#endif
49334965

4966+
#if ZEND_JIT_SUPPORT_CLDEMOTE
4967+
cpu_support_cldemote = zend_cpu_supports_cldemote();
4968+
#endif
4969+
49344970
dasm_buf = buf;
49354971
dasm_size = size;
49364972

0 commit comments

Comments
 (0)