diff --git a/ext/bcmath/libbcmath/src/convert.c b/ext/bcmath/libbcmath/src/convert.c index 953dde27bb13a..01617c0662c64 100644 --- a/ext/bcmath/libbcmath/src/convert.c +++ b/ext/bcmath/libbcmath/src/convert.c @@ -16,16 +16,11 @@ #include "bcmath.h" #include "convert.h" +#include "private.h" #ifdef __SSE2__ # include #endif -/* This will be 0x01010101 for 32-bit and 0x0101010101010101 */ -#define SWAR_ONES (~((size_t) 0) / 0xFF) -/* This repeats a byte `x` into an entire 32/64-bit word. - * Example: SWAR_REPEAT(0xAB) will be 0xABABABAB for 32-bit and 0xABABABABABABABAB for 64-bit. */ -#define SWAR_REPEAT(x) (SWAR_ONES * (x)) - static char *bc_copy_and_shift_numbers(char *restrict dest, const char *source, const char *source_end, unsigned char shift, bool add) { size_t bulk_shift = SWAR_REPEAT(shift); diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index 650321d7ff6f4..eb2f7b6645d7a 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -124,17 +124,19 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) { bc_num diff; - size_t diff_scale, diff_len; - size_t min_scale, min_len; - size_t borrow, count; + /* The caller is guaranteed that n1 is always large. */ + size_t diff_len = EXPECTED(n1->n_len >= n2->n_len) ? n1->n_len : n2->n_len; + size_t diff_scale = MAX(n1->n_scale, n2->n_scale); + /* Same condition as EXPECTED before, but using EXPECTED again will make it slower. */ + size_t min_len = n1->n_len >= n2->n_len ? n2->n_len : n1->n_len; + size_t min_scale = MIN(n1->n_scale, n2->n_scale); + size_t min_bytes = min_len + min_scale; + size_t borrow = 0; + size_t count; int val; char *n1ptr, *n2ptr, *diffptr; /* Allocate temporary storage. */ - diff_len = MAX(n1->n_len, n2->n_len); - diff_scale = MAX(n1->n_scale, n2->n_scale); - min_len = MIN(n1->n_len, n2->n_len); - min_scale = MIN(n1->n_scale, n2->n_scale); diff = bc_new_num (diff_len, MAX(diff_scale, scale_min)); /* Initialize the subtract. */ @@ -142,9 +144,6 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1); diffptr = (char *) (diff->n_value + diff_len + diff_scale - 1); - /* Subtract the numbers. */ - borrow = 0; - /* Take care of the longer scaled number. */ if (n1->n_scale != min_scale) { /* n1 has the longer scale */ @@ -166,7 +165,59 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) } /* Now do the equal length scale and integer parts. */ - for (count = 0; count < min_len + min_scale; count++) { + count = 0; + /* Uses SIMD to perform calculations at high speed. */ + if (min_bytes >= sizeof(BC_UINT_T)) { + diffptr++; + n1ptr++; + n2ptr++; + while (count + sizeof(BC_UINT_T) <= min_bytes) { + diffptr -= sizeof(BC_UINT_T); + n1ptr -= sizeof(BC_UINT_T); + n2ptr -= sizeof(BC_UINT_T); + + BC_UINT_T n1bytes; + BC_UINT_T n2bytes; + memcpy(&n1bytes, n1ptr, sizeof(n1bytes)); + memcpy(&n2bytes, n2ptr, sizeof(n2bytes)); + +#if BC_LITTLE_ENDIAN + /* Little endian requires changing the order of bytes. */ + n1bytes = BC_BSWAP(n1bytes); + n2bytes = BC_BSWAP(n2bytes); +#endif + + n1bytes -= n2bytes + borrow; + /* If the most significant bit is 1, a carry down has occurred. */ + bool tmp_borrow = n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1)); + + /* + * Check the most significant bit of each of the bytes, and if it is 1, a carry down has + * occurred. When carrying down occurs, due to the difference between decimal and hexadecimal + * numbers, an extra 6 is added to the lower 4 bits. + * Therefore, for a byte that has been carried down, set all the upper 4 bits to 0 and subtract + * 6 from the lower 4 bits to adjust it to the correct value as a decimal number. + */ + BC_UINT_T borrow_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0x06; + n1bytes = (n1bytes & SWAR_REPEAT(0x0F)) - borrow_mask; + +#if BC_LITTLE_ENDIAN + /* Little endian requires changing the order of bytes back. */ + n1bytes = BC_BSWAP(n1bytes); +#endif + + memcpy(diffptr, &n1bytes, sizeof(n1bytes)); + + borrow = tmp_borrow; + count += sizeof(BC_UINT_T); + } + diffptr--; + n1ptr--; + n2ptr--; + } + + /* Calculate the remaining bytes that are less than the size of BC_UINT_T using a normal loop. */ + for (; count < min_bytes; count++) { val = *n1ptr-- - *n2ptr-- - borrow; if (val < 0) { val += BASE; diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h index 481a651128320..f21bef665f954 100644 --- a/ext/bcmath/libbcmath/src/private.h +++ b/ext/bcmath/libbcmath/src/private.h @@ -34,6 +34,68 @@ #include #include +/* This will be 0x01010101 for 32-bit and 0x0101010101010101 for 64-bit */ +#define SWAR_ONES (~((size_t) 0) / 0xFF) +/* This repeats a byte `x` into an entire 32/64-bit word. + * Example: SWAR_REPEAT(0xAB) will be 0xABABABAB for 32-bit and 0xABABABABABABABAB for 64-bit. */ +#define SWAR_REPEAT(x) (SWAR_ONES * (x)) + +/* Bytes swap */ +#if defined(_MSC_VER) +# include +# define BSWAP32(u) _byteswap_ulong(u) +# define BSWAP64(u) _byteswap_uint64(u) +#else +# ifdef __has_builtin +# if __has_builtin(__builtin_bswap32) +# define BSWAP32(u) __builtin_bswap32(u) +# endif // __has_builtin(__builtin_bswap32) +# if __has_builtin(__builtin_bswap64) +# define BSWAP64(u) __builtin_bswap64(u) +# endif // __has_builtin(__builtin_bswap64) +# elif defined(__GNUC__) +# define BSWAP32(u) __builtin_bswap32(u) +# define BSWAP64(u) __builtin_bswap64(u) +# endif // __has_builtin +#endif // defined(_MSC_VER) +#ifndef BSWAP32 +inline uint32_t BSWAP32(uint32_t u) +{ + return (((u & 0xff000000) >> 24) + | ((u & 0x00ff0000) >> 8) + | ((u & 0x0000ff00) << 8) + | ((u & 0x000000ff) << 24)); +} +#endif +#ifndef BSWAP64 +inline uint64_t BSWAP64(uint64_t u) +{ + return (((u & 0xff00000000000000ULL) >> 56) + | ((u & 0x00ff000000000000ULL) >> 40) + | ((u & 0x0000ff0000000000ULL) >> 24) + | ((u & 0x000000ff00000000ULL) >> 8) + | ((u & 0x00000000ff000000ULL) << 8) + | ((u & 0x0000000000ff0000ULL) << 24) + | ((u & 0x000000000000ff00ULL) << 40) + | ((u & 0x00000000000000ffULL) << 56)); +} +#endif + +#if SIZEOF_SIZE_T >= 8 +#define BC_BSWAP(u) BSWAP64(u) +#define BC_UINT_T uint64_t +#else +#define BC_BSWAP(u) BSWAP32(u) +#define BC_UINT_T uint32_t +#endif + +#ifdef WORDS_BIGENDIAN +#define BC_LITTLE_ENDIAN 0 +#else +#define BC_LITTLE_ENDIAN 1 +#endif + + /* routines */ int _bc_do_compare (bc_num n1, bc_num n2, bool use_sign); bc_num _bc_do_add (bc_num n1, bc_num n2, size_t scale_min); diff --git a/ext/bcmath/libbcmath/src/str2num.c b/ext/bcmath/libbcmath/src/str2num.c index 370d899772ff8..4787e16b53a2b 100644 --- a/ext/bcmath/libbcmath/src/str2num.c +++ b/ext/bcmath/libbcmath/src/str2num.c @@ -31,6 +31,7 @@ #include "bcmath.h" #include "convert.h" +#include "private.h" #include #include #ifdef __SSE2__