Skip to content

Commit 36a16c7

Browse files
author
Sebastian Pop
committed
[aarch64] speed up add and sub operators with overflow detection
On A72, google-benchmark measure before and after the patch: -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- BM_add_before 13.3 ns 13.3 ns 52626058 BM_sub_before 8.72 ns 8.72 ns 80259343 BM_add_after 4.80 ns 4.80 ns 145926004 BM_sub_after 4.80 ns 4.80 ns 145936496 Before the patch: fast_long_add_function: ldr x1, [x1] ldr x2, [x2] add x3, x1, x2 eor x4, x1, x2 tbz x4, php#63, .L5 .L2: mov w1, 4 str x3, [x0] str w1, [x0, 8] ret .p2align 2 .L5: eor x4, x1, x3 tbz x4, php#63, .L2 scvtf d0, x1 scvtf d1, x2 mov w1, 5 str w1, [x0, 8] fadd d0, d0, d1 str d0, [x0] ret With the patch: fast_long_add_function: ldr x5, [x1] ldr x6, [x2] adds x5, x5, x6 bvs .L2 mov w6, 4 str x5, [x0] str w6, [x0, 8] ret .L2: ldr x1, [x1] mov w3, 5 ldr x2, [x2] str w3, [x0, 8] scvtf d0, x1 scvtf d1, x2 fadd d0, d0, d1 str d0, [x0] ret php$ ./sapi/cli/php Zend/bench.php Base: Patch: simple 0.091 simple 0.091 simplecall 0.014 simplecall 0.014 simpleucall 0.041 simpleucall 0.041 simpleudcall 0.045 simpleudcall 0.045 mandel 0.193 mandel 0.193 mandel2 0.229 mandel2 0.229 ackermann(7) 0.044 ackermann(7) 0.044 ary(50000) 0.010 ary(50000) 0.010 ary2(50000) 0.008 ary2(50000) 0.008 ary3(2000) 0.096 ary3(2000) 0.095 fibo(30) 0.149 fibo(30) 0.148 hash1(50000) 0.016 hash1(50000) 0.016 hash2(500) 0.020 hash2(500) 0.020 heapsort(20000) 0.055 heapsort(20000) 0.054 matrix(20) 0.057 matrix(20) 0.057 nestedloop(12) 0.091 nestedloop(12) 0.091 sieve(30) 0.032 sieve(30) 0.032 strcat(200000) 0.010 strcat(200000) 0.010 ------------------------ ------------------------ Total 1.199 Total 1.197 php$ ./sapi/cli/php Zend/micro_bench.php Base: Patch: empty_loop 0.051 empty_loop 0.051 func() 0.181 0.130 func() 0.181 0.130 undef_func() 0.186 0.135 undef_func() 0.186 0.135 int_func() 0.116 0.064 int_func() 0.116 0.064 $x = self::$x 0.235 0.183 $x = self::$x 0.233 0.182 self::$x = 0 0.198 0.147 self::$x = 0 0.198 0.147 isset(self::$x) 0.229 0.178 isset(self::$x) 0.229 0.178 empty(self::$x) 0.231 0.180 empty(self::$x) 0.231 0.180 $x = Foo::$x 0.144 0.093 $x = Foo::$x 0.144 0.093 Foo::$x = 0 0.107 0.056 Foo::$x = 0 0.107 0.056 isset(Foo::$x) 0.140 0.088 isset(Foo::$x) 0.140 0.088 empty(Foo::$x) 0.148 0.097 empty(Foo::$x) 0.148 0.097 self::f() 0.238 0.187 self::f() 0.238 0.187 Foo::f() 0.209 0.158 Foo::f() 0.209 0.158 $x = $this->x 0.123 0.072 $x = $this->x 0.123 0.072 $this->x = 0 0.124 0.073 $this->x = 0 0.124 0.073 $this->x += 2 0.151 0.099 $this->x += 2 0.153 0.101 ++$this->x 0.137 0.086 ++$this->x 0.138 0.086 --$this->x 0.137 0.086 --$this->x 0.138 0.086 $this->x++ 0.170 0.119 $this->x++ 0.172 0.121 $this->x-- 0.171 0.119 $this->x-- 0.172 0.121 isset($this->x) 0.170 0.119 isset($this->x) 0.170 0.119 empty($this->x) 0.179 0.128 empty($this->x) 0.179 0.128 $this->f() 0.194 0.143 $this->f() 0.194 0.143 $x = Foo::TEST 0.188 0.137 $x = Foo::TEST 0.188 0.136 new Foo() 0.482 0.431 new Foo() 0.479 0.427 $x = TEST 0.109 0.058 $x = TEST 0.109 0.058 $x = $_GET 0.190 0.138 $x = $_GET 0.190 0.139 $x = $GLOBALS['v'] 0.242 0.191 $x = $GLOBALS['v'] 0.242 0.191 $x = $hash['v'] 0.196 0.145 $x = $hash['v'] 0.196 0.145 $x = $str[0] 0.146 0.094 $x = $str[0] 0.145 0.094 $x = $a ?: null 0.144 0.093 $x = $a ?: null 0.144 0.093 $x = $f ?: tmp 0.174 0.123 $x = $f ?: tmp 0.174 0.123 $x = $f ? $f : $a 0.153 0.101 $x = $f ? $f : $a 0.153 0.101 $x = $f ? $f : tmp 0.148 0.097 $x = $f ? $f : tmp 0.148 0.097 ------------------------ ------------------------ Total 6.143 Total 6.143
1 parent fc42ac2 commit 36a16c7

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

Zend/zend_operators.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,29 @@ overflow: ZEND_ATTRIBUTE_COLD_LABEL
604604
return;
605605
overflow: ZEND_ATTRIBUTE_COLD_LABEL
606606
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) + (double) Z_LVAL_P(op2));
607+
#elif defined(HAVE_ASM_GOTO) && defined(__aarch64__)
608+
register int64_t tmp1 asm("x5");
609+
register int64_t tmp2 asm("x6");
610+
__asm__ goto(
611+
"ldr %5, [%1]\n\t"
612+
"ldr %6, [%2]\n\t"
613+
"adds %5, %5, %6\n\t"
614+
"bvs %l7\n\t"
615+
"mov w6, %3\n\t"
616+
"str %5, [%0]\n\t"
617+
"str w6, [%0, %c4]\n"
618+
:
619+
: "r"(&result->value),
620+
"r"(&op1->value),
621+
"r"(&op2->value),
622+
"n"(IS_LONG),
623+
"n"(ZVAL_OFFSETOF_TYPE),
624+
"r"(tmp1), "r"(tmp2)
625+
: "cc", "memory"
626+
: overflow);
627+
return;
628+
overflow: ZEND_ATTRIBUTE_COLD_LABEL
629+
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) + (double) Z_LVAL_P(op2));
607630
#elif PHP_HAVE_BUILTIN_SADDL_OVERFLOW && SIZEOF_LONG == SIZEOF_ZEND_LONG
608631
long lresult;
609632
if (UNEXPECTED(__builtin_saddl_overflow(Z_LVAL_P(op1), Z_LVAL_P(op2), &lresult))) {
@@ -694,6 +717,29 @@ overflow: ZEND_ATTRIBUTE_COLD_LABEL
694717
return;
695718
overflow: ZEND_ATTRIBUTE_COLD_LABEL
696719
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) - (double) Z_LVAL_P(op2));
720+
#elif defined(HAVE_ASM_GOTO) && defined(__aarch64__)
721+
register int64_t tmp1 asm("x5");
722+
register int64_t tmp2 asm("x6");
723+
__asm__ goto(
724+
"ldr %5, [%1]\n\t"
725+
"ldr %6, [%2]\n\t"
726+
"subs %5, %5, %6\n\t"
727+
"bvs %l7\n\t"
728+
"mov w6, %3\n\t"
729+
"str %5, [%0]\n\t"
730+
"str w6, [%0, %c4]\n"
731+
:
732+
: "r"(&result->value),
733+
"r"(&op1->value),
734+
"r"(&op2->value),
735+
"n"(IS_LONG),
736+
"n"(ZVAL_OFFSETOF_TYPE),
737+
"r"(tmp1), "r"(tmp2)
738+
: "cc", "memory"
739+
: overflow);
740+
return;
741+
overflow: ZEND_ATTRIBUTE_COLD_LABEL
742+
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) - (double) Z_LVAL_P(op2));
697743
#elif PHP_HAVE_BUILTIN_SSUBL_OVERFLOW && SIZEOF_LONG == SIZEOF_ZEND_LONG
698744
long lresult;
699745
if (UNEXPECTED(__builtin_ssubl_overflow(Z_LVAL_P(op1), Z_LVAL_P(op2), &lresult))) {

configure.ac

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,11 @@ dnl Check for asm goto support
738738
AC_CACHE_CHECK([for asm goto], ac_cv__asm_goto,
739739
[AC_RUN_IFELSE([AC_LANG_SOURCE([[
740740
int main(void) {
741+
#if defined(__x86_64__) || defined(__i386__)
741742
__asm__ goto("jmp %l0\n" :::: end);
743+
#elif defined(__aarch64__)
744+
__asm__ goto("b %l0\n" :::: end);
745+
#endif
742746
end:
743747
return 0;
744748
}

0 commit comments

Comments
 (0)