Skip to content

Commit adc3b72

Browse files
Sebastian Popdstogov
Sebastian Pop
authored andcommitted
speed up add and sub operators with overflow detection
On A72, google-benchmark measure before and after the patch: -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- BM_add_before 13.3 ns 13.3 ns 52626058 BM_sub_before 8.72 ns 8.72 ns 80259343 BM_add_after 4.80 ns 4.80 ns 145926004 BM_sub_after 4.80 ns 4.80 ns 145936496 Before the patch: fast_long_add_function: ldr x1, [x1] ldr x2, [x2] add x3, x1, x2 eor x4, x1, x2 tbz x4, #63, .L5 .L2: mov w1, 4 str x3, [x0] str w1, [x0, 8] ret .p2align 2 .L5: eor x4, x1, x3 tbz x4, #63, .L2 scvtf d0, x1 scvtf d1, x2 mov w1, 5 str w1, [x0, 8] fadd d0, d0, d1 str d0, [x0] ret With the patch: fast_long_add_function: ldr x5, [x1] ldr x6, [x2] adds x5, x5, x6 bvs .L2 mov w6, 4 str x5, [x0] str w6, [x0, 8] ret .L2: ldr x1, [x1] mov w3, 5 ldr x2, [x2] str w3, [x0, 8] scvtf d0, x1 scvtf d1, x2 fadd d0, d0, d1 str d0, [x0] ret php$ ./sapi/cli/php Zend/bench.php Base: Patch: simple 0.091 simple 0.091 simplecall 0.014 simplecall 0.014 simpleucall 0.041 simpleucall 0.041 simpleudcall 0.045 simpleudcall 0.045 mandel 0.193 mandel 0.193 mandel2 0.229 mandel2 0.229 ackermann(7) 0.044 ackermann(7) 0.044 ary(50000) 0.010 ary(50000) 0.010 ary2(50000) 0.008 ary2(50000) 0.008 ary3(2000) 0.096 ary3(2000) 0.095 fibo(30) 0.149 fibo(30) 0.148 hash1(50000) 0.016 hash1(50000) 0.016 hash2(500) 0.020 hash2(500) 0.020 heapsort(20000) 0.055 heapsort(20000) 0.054 matrix(20) 0.057 matrix(20) 0.057 nestedloop(12) 0.091 nestedloop(12) 0.091 sieve(30) 0.032 sieve(30) 0.032 strcat(200000) 0.010 strcat(200000) 0.010 ------------------------ ------------------------ Total 1.199 Total 1.197 php$ ./sapi/cli/php Zend/micro_bench.php Base: Patch: empty_loop 0.051 empty_loop 0.051 func() 0.181 0.130 func() 0.181 0.130 undef_func() 0.186 0.135 undef_func() 0.186 0.135 int_func() 0.116 0.064 int_func() 0.116 0.064 $x = self::$x 0.235 0.183 $x = self::$x 0.233 0.182 self::$x = 0 0.198 0.147 self::$x = 0 0.198 0.147 isset(self::$x) 0.229 0.178 isset(self::$x) 0.229 0.178 empty(self::$x) 0.231 0.180 empty(self::$x) 0.231 0.180 $x = Foo::$x 0.144 0.093 $x = Foo::$x 0.144 0.093 Foo::$x = 0 0.107 0.056 Foo::$x = 0 0.107 0.056 isset(Foo::$x) 0.140 0.088 isset(Foo::$x) 0.140 0.088 empty(Foo::$x) 0.148 0.097 empty(Foo::$x) 0.148 0.097 self::f() 0.238 0.187 self::f() 0.238 0.187 Foo::f() 0.209 0.158 Foo::f() 0.209 0.158 $x = $this->x 0.123 0.072 $x = $this->x 0.123 0.072 $this->x = 0 0.124 0.073 $this->x = 0 0.124 0.073 $this->x += 2 0.151 0.099 $this->x += 2 0.153 0.101 ++$this->x 0.137 0.086 ++$this->x 0.138 0.086 --$this->x 0.137 0.086 --$this->x 0.138 0.086 $this->x++ 0.170 0.119 $this->x++ 0.172 0.121 $this->x-- 0.171 0.119 $this->x-- 0.172 0.121 isset($this->x) 0.170 0.119 isset($this->x) 0.170 0.119 empty($this->x) 0.179 0.128 empty($this->x) 0.179 0.128 $this->f() 0.194 0.143 $this->f() 0.194 0.143 $x = Foo::TEST 0.188 0.137 $x = Foo::TEST 0.188 0.136 new Foo() 0.482 0.431 new Foo() 0.479 0.427 $x = TEST 0.109 0.058 $x = TEST 0.109 0.058 $x = $_GET 0.190 0.138 $x = $_GET 0.190 0.139 $x = $GLOBALS['v'] 0.242 0.191 $x = $GLOBALS['v'] 0.242 0.191 $x = $hash['v'] 0.196 0.145 $x = $hash['v'] 0.196 0.145 $x = $str[0] 0.146 0.094 $x = $str[0] 0.145 0.094 $x = $a ?: null 0.144 0.093 $x = $a ?: null 0.144 0.093 $x = $f ?: tmp 0.174 0.123 $x = $f ?: tmp 0.174 0.123 $x = $f ? $f : $a 0.153 0.101 $x = $f ? $f : $a 0.153 0.101 $x = $f ? $f : tmp 0.148 0.097 $x = $f ? $f : tmp 0.148 0.097 ------------------------ ------------------------ Total 6.143 Total 6.143
1 parent cc9c5d8 commit adc3b72

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

Zend/zend_operators.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,26 @@ overflow: ZEND_ATTRIBUTE_COLD_LABEL
604604
return;
605605
overflow: ZEND_ATTRIBUTE_COLD_LABEL
606606
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) + (double) Z_LVAL_P(op2));
607+
#elif defined(HAVE_ASM_GOTO) && defined(__aarch64__)
608+
__asm__ goto(
609+
"ldr x5, [%1]\n\t"
610+
"ldr x6, [%2]\n\t"
611+
"adds x5, x5, x6\n\t"
612+
"bvs %l5\n\t"
613+
"mov w6, %3\n\t"
614+
"str x5, [%0]\n\t"
615+
"str w6, [%0, %c4]\n"
616+
:
617+
: "r"(&result->value),
618+
"r"(&op1->value),
619+
"r"(&op2->value),
620+
"n"(IS_LONG),
621+
"n"(ZVAL_OFFSETOF_TYPE)
622+
: "x5", "x6", "cc", "memory"
623+
: overflow);
624+
return;
625+
overflow: ZEND_ATTRIBUTE_COLD_LABEL
626+
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) + (double) Z_LVAL_P(op2));
607627
#elif PHP_HAVE_BUILTIN_SADDL_OVERFLOW && SIZEOF_LONG == SIZEOF_ZEND_LONG
608628
long lresult;
609629
if (UNEXPECTED(__builtin_saddl_overflow(Z_LVAL_P(op1), Z_LVAL_P(op2), &lresult))) {
@@ -694,6 +714,26 @@ overflow: ZEND_ATTRIBUTE_COLD_LABEL
694714
return;
695715
overflow: ZEND_ATTRIBUTE_COLD_LABEL
696716
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) - (double) Z_LVAL_P(op2));
717+
#elif defined(HAVE_ASM_GOTO) && defined(__aarch64__)
718+
__asm__ goto(
719+
"ldr x5, [%1]\n\t"
720+
"ldr x6, [%2]\n\t"
721+
"subs x5, x5, x6\n\t"
722+
"bvs %l5\n\t"
723+
"mov w6, %3\n\t"
724+
"str x5, [%0]\n\t"
725+
"str w6, [%0, %c4]\n"
726+
:
727+
: "r"(&result->value),
728+
"r"(&op1->value),
729+
"r"(&op2->value),
730+
"n"(IS_LONG),
731+
"n"(ZVAL_OFFSETOF_TYPE)
732+
: "x5", "x6", "cc", "memory"
733+
: overflow);
734+
return;
735+
overflow: ZEND_ATTRIBUTE_COLD_LABEL
736+
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) - (double) Z_LVAL_P(op2));
697737
#elif PHP_HAVE_BUILTIN_SSUBL_OVERFLOW && SIZEOF_LONG == SIZEOF_ZEND_LONG
698738
long lresult;
699739
if (UNEXPECTED(__builtin_ssubl_overflow(Z_LVAL_P(op1), Z_LVAL_P(op2), &lresult))) {

configure.ac

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,7 +737,11 @@ dnl Check for asm goto support
737737
AC_CACHE_CHECK([for asm goto], ac_cv__asm_goto,
738738
[AC_RUN_IFELSE([AC_LANG_SOURCE([[
739739
int main(void) {
740+
#if defined(__x86_64__) || defined(__i386__)
740741
__asm__ goto("jmp %l0\n" :::: end);
742+
#elif defined(__aarch64__)
743+
__asm__ goto("b %l0\n" :::: end);
744+
#endif
741745
end:
742746
return 0;
743747
}

0 commit comments

Comments
 (0)