Skip to content

Commit 63f6968

Browse files
committed
use SIMD
1 parent 9a715bb commit 63f6968

File tree

1 file changed

+51
-1
lines changed

1 file changed

+51
-1
lines changed

ext/bcmath/libbcmath/src/doaddsub.c

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,57 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min)
166166
}
167167

168168
/* Now do the equal length scale and integer parts. */
169-
for (count = 0; count < min_len + min_scale; count++) {
169+
count = 0;
170+
if (min_len + min_scale >= sizeof(BC_UINT_T)) {
171+
diffptr++;
172+
n1ptr++;
173+
n2ptr++;
174+
while (count + sizeof(BC_UINT_T) <= min_len + min_scale) {
175+
diffptr -= sizeof(BC_UINT_T);
176+
n1ptr -= sizeof(BC_UINT_T);
177+
n2ptr -= sizeof(BC_UINT_T);
178+
179+
BC_UINT_T n1bytes;
180+
BC_UINT_T n2bytes;
181+
memcpy(&n1bytes, n1ptr, sizeof(n1bytes));
182+
memcpy(&n2bytes, n2ptr, sizeof(n2bytes));
183+
184+
#if BC_LITTLE_ENDIAN
185+
/* Bytes swap */
186+
n1bytes = BC_BSWAP(n1bytes);
187+
n2bytes = BC_BSWAP(n2bytes);
188+
#endif
189+
190+
n1bytes -= n2bytes + borrow;
191+
/* If the most significant 4 bits of the 8 bytes are not 0, a carry-down has occurred. */
192+
bool tmp_borrow = n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1));
193+
194+
/*
195+
* If any one of the upper 4 bits of each of the 8 bytes is 1, subtract 6 from that byte.
196+
* The fact that the upper 4 bits are not 0 means that a carry-down has occurred, and when
197+
* the hexadecimal number is carried down, there is a difference of 6 from the decimal
198+
* calculation, so 6 is subtracted.
199+
* Also, set all upper 4 bits to 0.
200+
*/
201+
BC_UINT_T borrow_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0x06;
202+
n1bytes = (n1bytes & SWAR_REPEAT(0x0F)) - borrow_mask;
203+
204+
#if BC_LITTLE_ENDIAN
205+
/* Bytes swap */
206+
n1bytes = BC_BSWAP(n1bytes);
207+
#endif
208+
209+
memcpy(diffptr, &n1bytes, sizeof(n1bytes));
210+
211+
borrow = tmp_borrow;
212+
count += sizeof(BC_UINT_T);
213+
}
214+
diffptr--;
215+
n1ptr--;
216+
n2ptr--;
217+
}
218+
219+
for (; count < min_len + min_scale; count++) {
170220
val = *n1ptr-- - *n2ptr-- - borrow;
171221
if (val < 0) {
172222
val += BASE;

0 commit comments

Comments
 (0)