From 536d2f7cd5e86cf5aabdb63a83d1c427435bdebd Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Sun, 5 May 2024 21:13:55 +0900 Subject: [PATCH 01/10] Added some definitions to bcmath.h Moved SWAR_ONES and SWAR_REPEAT to header, and defined bytes swap etc. --- ext/bcmath/libbcmath/src/bcmath.h | 60 ++++++++++++++++++++++++++++++ ext/bcmath/libbcmath/src/convert.c | 6 --- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/ext/bcmath/libbcmath/src/bcmath.h b/ext/bcmath/libbcmath/src/bcmath.h index b8e3b4c7fb86a..c394982b3af23 100644 --- a/ext/bcmath/libbcmath/src/bcmath.h +++ b/ext/bcmath/libbcmath/src/bcmath.h @@ -78,6 +78,66 @@ typedef struct bc_struct { #define LONG_MAX 0x7ffffff #endif +/* This will be 0x01010101 for 32-bit and 0x0101010101010101 for 64-bit */ +#define SWAR_ONES (~((size_t) 0) / 0xFF) +/* This repeats a byte `x` into an entire 32/64-bit word. + * Example: SWAR_REPEAT(0xAB) will be 0xABABABAB for 32-bit and 0xABABABABABABABAB for 64-bit. */ +#define SWAR_REPEAT(x) (SWAR_ONES * (x)) + +/* Bytes swap */ +#if defined(_MSC_VER) +# include +# define BSWAP32(u) _byteswap_ulong(u) +# define BSWAP64(u) _byteswap_uint64(u) +#else +# ifdef __has_builtin +# if __has_builtin(__builtin_bswap32) +# define BSWAP32(u) __builtin_bswap32(u) +# endif // __has_builtin(__builtin_bswap32) +# if __has_builtin(__builtin_bswap64) +# define BSWAP64(u) __builtin_bswap64(u) +# endif // __has_builtin(__builtin_bswap64) +# elif defined(__GNUC__) +# define BSWAP32(u) __builtin_bswap32(u) +# define BSWAP64(u) __builtin_bswap64(u) +# endif // __has_builtin +#endif // defined(_MSC_VER) +#ifndef BSWAP32 +inline uint32_t BSWAP32(uint32_t u) +{ + return (((u & 0xff000000) >> 24) + | ((u & 0x00ff0000) >> 8) + | ((u & 0x0000ff00) << 8) + | ((u & 0x000000ff) << 24)); +} +#endif +#ifndef BSWAP64 +inline uint64_t BSWAP64(uint64_t u) +{ + return (((u & 0xff00000000000000ULL) >> 56) + | ((u & 0x00ff000000000000ULL) >> 40) + | ((u & 0x0000ff0000000000ULL) >> 24) + | ((u & 0x000000ff00000000ULL) >> 8) + | ((u & 0x00000000ff000000ULL) << 8) + | ((u & 0x0000000000ff0000ULL) << 24) + | ((u & 0x000000000000ff00ULL) << 40) + | ((u & 0x00000000000000ffULL) << 56)); +} +#endif + +#if SIZEOF_SIZE_T >= 8 +#define BC_BSWAP(u) BSWAP64(u) +#define BC_UINT_T uint64_t +#else +#define BC_BSWAP(u) BSWAP32(u) +#define BC_UINT_T uint32_t +#endif + +#ifdef WORDS_BIGENDIAN +#define BC_LITTLE_ENDIAN 0 +#else +#define BC_LITTLE_ENDIAN 1 +#endif /* Function Prototypes */ diff --git a/ext/bcmath/libbcmath/src/convert.c b/ext/bcmath/libbcmath/src/convert.c index 953dde27bb13a..5164215a333f3 100644 --- a/ext/bcmath/libbcmath/src/convert.c +++ b/ext/bcmath/libbcmath/src/convert.c @@ -20,12 +20,6 @@ # include #endif -/* This will be 0x01010101 for 32-bit and 0x0101010101010101 */ -#define SWAR_ONES (~((size_t) 0) / 0xFF) -/* This repeats a byte `x` into an entire 32/64-bit word. - * Example: SWAR_REPEAT(0xAB) will be 0xABABABAB for 32-bit and 0xABABABABABABABAB for 64-bit. */ -#define SWAR_REPEAT(x) (SWAR_ONES * (x)) - static char *bc_copy_and_shift_numbers(char *restrict dest, const char *source, const char *source_end, unsigned char shift, bool add) { size_t bulk_shift = SWAR_REPEAT(shift); From 670eada0f69e1a62a575c4b48088b96801c143dc Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Sun, 5 May 2024 22:34:34 +0900 Subject: [PATCH 02/10] Refactor _bc_do_sub _bc_do_sub has been modified to use SIMD to perform faster calculations. --- ext/bcmath/libbcmath/src/doaddsub.c | 77 +++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index 650321d7ff6f4..b3c60ac7bbfc3 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -124,36 +124,33 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) { bc_num diff; - size_t diff_scale, diff_len; - size_t min_scale, min_len; - size_t borrow, count; + + size_t diff_scale = MAX(n1->n_scale, n2->n_scale); + size_t diff_len = EXPECTED(n1->n_len >= n2->n_len) ? n1->n_len : n2->n_len; + size_t min_scale = MIN(n1->n_scale, n2->n_scale); + size_t min_len = n1->n_len >= n2->n_len ? n2->n_len : n1->n_len; + size_t min_bytes = min_len + min_scale; + size_t borrow = 0; int val; char *n1ptr, *n2ptr, *diffptr; /* Allocate temporary storage. */ - diff_len = MAX(n1->n_len, n2->n_len); - diff_scale = MAX(n1->n_scale, n2->n_scale); - min_len = MIN(n1->n_len, n2->n_len); - min_scale = MIN(n1->n_scale, n2->n_scale); - diff = bc_new_num (diff_len, MAX(diff_scale, scale_min)); + diff = bc_new_num (n1->n_len, MAX(diff_scale, scale_min)); /* Initialize the subtract. */ n1ptr = (char *) (n1->n_value + n1->n_len + n1->n_scale - 1); n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1); diffptr = (char *) (diff->n_value + diff_len + diff_scale - 1); - /* Subtract the numbers. */ - borrow = 0; - /* Take care of the longer scaled number. */ if (n1->n_scale != min_scale) { /* n1 has the longer scale */ - for (count = n1->n_scale - min_scale; count > 0; count--) { + for (size_t count = n1->n_scale - min_scale; count > 0; count--) { *diffptr-- = *n1ptr--; } } else { /* n2 has the longer scale */ - for (count = n2->n_scale - min_scale; count > 0; count--) { + for (size_t count = n2->n_scale - min_scale; count > 0; count--) { val = -*n2ptr-- - borrow; if (val < 0) { val += BASE; @@ -166,7 +163,57 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) } /* Now do the equal length scale and integer parts. */ - for (count = 0; count < min_len + min_scale; count++) { + size_t sub_count = 0; + if (min_bytes >= sizeof(BC_UINT_T)) { + diffptr++; + n1ptr++; + n2ptr++; + while (sub_count + sizeof(BC_UINT_T) <= min_bytes) { + diffptr -= sizeof(BC_UINT_T); + n1ptr -= sizeof(BC_UINT_T); + n2ptr -= sizeof(BC_UINT_T); + + BC_UINT_T n1bytes; + BC_UINT_T n2bytes; + memcpy(&n1bytes, n1ptr, sizeof(n1bytes)); + memcpy(&n2bytes, n2ptr, sizeof(n2bytes)); + +#if BC_LITTLE_ENDIAN + /* Bytes swap */ + n1bytes = BC_BSWAP(n1bytes); + n2bytes = BC_BSWAP(n2bytes); +#endif + + n1bytes -= (n2bytes + borrow); + /* If the most significant 4 bits of the 8 bytes are not 0, a carry-down has occurred. */ + bool tmp_borrow = n1bytes >= ((BC_UINT_T) 0x10 << (8 * (sizeof(BC_UINT_T) - 1))); + + /* + * If any one of the upper 4 bits of each of the 8 bytes is 1, subtract 6 from that byte. + * The fact that the upper 4 bits are not 0 means that a carry-down has occurred, and when + * the hexadecimal number is carried down, there is a difference of 6 from the decimal + * calculation, so 6 is subtracted. + * Also, set all upper 4 bits to 0. + */ + BC_UINT_T borrow_mask = (((n1bytes | (n1bytes >> 1) | (n1bytes >> 2) | (n1bytes >> 3)) & SWAR_REPEAT(0x10)) * 0x06) >> 4; + n1bytes = (n1bytes & SWAR_REPEAT(0x0F)) - borrow_mask; + +#if BC_LITTLE_ENDIAN + /* Bytes swap */ + n1bytes = BC_BSWAP(n1bytes); +#endif + + memcpy(diffptr, &n1bytes, sizeof(n1bytes)); + + borrow = tmp_borrow; + sub_count += sizeof(BC_UINT_T); + } + diffptr--; + n1ptr--; + n2ptr--; + } + + for (; sub_count < min_bytes; sub_count++) { val = *n1ptr-- - *n2ptr-- - borrow; if (val < 0) { val += BASE; @@ -179,7 +226,7 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) /* If n1 has more digits than n2, we now do that subtract. */ if (diff_len != min_len) { - for (count = diff_len - min_len; count > 0; count--) { + for (size_t count = diff_len - min_len; count > 0; count--) { val = *n1ptr-- - borrow; if (val < 0) { val += BASE; From 9a715bbf5e207953685353c3236df93f327ebe7f Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 6 May 2024 07:20:36 +0900 Subject: [PATCH 03/10] restore it once --- ext/bcmath/libbcmath/src/doaddsub.c | 77 ++++++----------------------- 1 file changed, 15 insertions(+), 62 deletions(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index b3c60ac7bbfc3..650321d7ff6f4 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -124,33 +124,36 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) { bc_num diff; - - size_t diff_scale = MAX(n1->n_scale, n2->n_scale); - size_t diff_len = EXPECTED(n1->n_len >= n2->n_len) ? n1->n_len : n2->n_len; - size_t min_scale = MIN(n1->n_scale, n2->n_scale); - size_t min_len = n1->n_len >= n2->n_len ? n2->n_len : n1->n_len; - size_t min_bytes = min_len + min_scale; - size_t borrow = 0; + size_t diff_scale, diff_len; + size_t min_scale, min_len; + size_t borrow, count; int val; char *n1ptr, *n2ptr, *diffptr; /* Allocate temporary storage. */ - diff = bc_new_num (n1->n_len, MAX(diff_scale, scale_min)); + diff_len = MAX(n1->n_len, n2->n_len); + diff_scale = MAX(n1->n_scale, n2->n_scale); + min_len = MIN(n1->n_len, n2->n_len); + min_scale = MIN(n1->n_scale, n2->n_scale); + diff = bc_new_num (diff_len, MAX(diff_scale, scale_min)); /* Initialize the subtract. */ n1ptr = (char *) (n1->n_value + n1->n_len + n1->n_scale - 1); n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1); diffptr = (char *) (diff->n_value + diff_len + diff_scale - 1); + /* Subtract the numbers. */ + borrow = 0; + /* Take care of the longer scaled number. */ if (n1->n_scale != min_scale) { /* n1 has the longer scale */ - for (size_t count = n1->n_scale - min_scale; count > 0; count--) { + for (count = n1->n_scale - min_scale; count > 0; count--) { *diffptr-- = *n1ptr--; } } else { /* n2 has the longer scale */ - for (size_t count = n2->n_scale - min_scale; count > 0; count--) { + for (count = n2->n_scale - min_scale; count > 0; count--) { val = -*n2ptr-- - borrow; if (val < 0) { val += BASE; @@ -163,57 +166,7 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) } /* Now do the equal length scale and integer parts. */ - size_t sub_count = 0; - if (min_bytes >= sizeof(BC_UINT_T)) { - diffptr++; - n1ptr++; - n2ptr++; - while (sub_count + sizeof(BC_UINT_T) <= min_bytes) { - diffptr -= sizeof(BC_UINT_T); - n1ptr -= sizeof(BC_UINT_T); - n2ptr -= sizeof(BC_UINT_T); - - BC_UINT_T n1bytes; - BC_UINT_T n2bytes; - memcpy(&n1bytes, n1ptr, sizeof(n1bytes)); - memcpy(&n2bytes, n2ptr, sizeof(n2bytes)); - -#if BC_LITTLE_ENDIAN - /* Bytes swap */ - n1bytes = BC_BSWAP(n1bytes); - n2bytes = BC_BSWAP(n2bytes); -#endif - - n1bytes -= (n2bytes + borrow); - /* If the most significant 4 bits of the 8 bytes are not 0, a carry-down has occurred. */ - bool tmp_borrow = n1bytes >= ((BC_UINT_T) 0x10 << (8 * (sizeof(BC_UINT_T) - 1))); - - /* - * If any one of the upper 4 bits of each of the 8 bytes is 1, subtract 6 from that byte. - * The fact that the upper 4 bits are not 0 means that a carry-down has occurred, and when - * the hexadecimal number is carried down, there is a difference of 6 from the decimal - * calculation, so 6 is subtracted. - * Also, set all upper 4 bits to 0. - */ - BC_UINT_T borrow_mask = (((n1bytes | (n1bytes >> 1) | (n1bytes >> 2) | (n1bytes >> 3)) & SWAR_REPEAT(0x10)) * 0x06) >> 4; - n1bytes = (n1bytes & SWAR_REPEAT(0x0F)) - borrow_mask; - -#if BC_LITTLE_ENDIAN - /* Bytes swap */ - n1bytes = BC_BSWAP(n1bytes); -#endif - - memcpy(diffptr, &n1bytes, sizeof(n1bytes)); - - borrow = tmp_borrow; - sub_count += sizeof(BC_UINT_T); - } - diffptr--; - n1ptr--; - n2ptr--; - } - - for (; sub_count < min_bytes; sub_count++) { + for (count = 0; count < min_len + min_scale; count++) { val = *n1ptr-- - *n2ptr-- - borrow; if (val < 0) { val += BASE; @@ -226,7 +179,7 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) /* If n1 has more digits than n2, we now do that subtract. */ if (diff_len != min_len) { - for (size_t count = diff_len - min_len; count > 0; count--) { + for (count = diff_len - min_len; count > 0; count--) { val = *n1ptr-- - borrow; if (val < 0) { val += BASE; From 63f69688c0f7dc2be23926140019a83cdcdeca01 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 6 May 2024 08:19:18 +0900 Subject: [PATCH 04/10] use SIMD --- ext/bcmath/libbcmath/src/doaddsub.c | 52 ++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index 650321d7ff6f4..f628c7e363d2d 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -166,7 +166,57 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) } /* Now do the equal length scale and integer parts. */ - for (count = 0; count < min_len + min_scale; count++) { + count = 0; + if (min_len + min_scale >= sizeof(BC_UINT_T)) { + diffptr++; + n1ptr++; + n2ptr++; + while (count + sizeof(BC_UINT_T) <= min_len + min_scale) { + diffptr -= sizeof(BC_UINT_T); + n1ptr -= sizeof(BC_UINT_T); + n2ptr -= sizeof(BC_UINT_T); + + BC_UINT_T n1bytes; + BC_UINT_T n2bytes; + memcpy(&n1bytes, n1ptr, sizeof(n1bytes)); + memcpy(&n2bytes, n2ptr, sizeof(n2bytes)); + +#if BC_LITTLE_ENDIAN + /* Bytes swap */ + n1bytes = BC_BSWAP(n1bytes); + n2bytes = BC_BSWAP(n2bytes); +#endif + + n1bytes -= n2bytes + borrow; + /* If the most significant 4 bits of the 8 bytes are not 0, a carry-down has occurred. */ + bool tmp_borrow = n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1)); + + /* + * If any one of the upper 4 bits of each of the 8 bytes is 1, subtract 6 from that byte. + * The fact that the upper 4 bits are not 0 means that a carry-down has occurred, and when + * the hexadecimal number is carried down, there is a difference of 6 from the decimal + * calculation, so 6 is subtracted. + * Also, set all upper 4 bits to 0. + */ + BC_UINT_T borrow_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0x06; + n1bytes = (n1bytes & SWAR_REPEAT(0x0F)) - borrow_mask; + +#if BC_LITTLE_ENDIAN + /* Bytes swap */ + n1bytes = BC_BSWAP(n1bytes); +#endif + + memcpy(diffptr, &n1bytes, sizeof(n1bytes)); + + borrow = tmp_borrow; + count += sizeof(BC_UINT_T); + } + diffptr--; + n1ptr--; + n2ptr--; + } + + for (; count < min_len + min_scale; count++) { val = *n1ptr-- - *n2ptr-- - borrow; if (val < 0) { val += BASE; From 1dede5a69bec79f0bd0020bc4768954ae77fa6c7 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 6 May 2024 09:18:03 +0900 Subject: [PATCH 05/10] refactor initialization --- ext/bcmath/libbcmath/src/doaddsub.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index f628c7e363d2d..fdc96b95e3e04 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -124,17 +124,16 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) { bc_num diff; - size_t diff_scale, diff_len; - size_t min_scale, min_len; - size_t borrow, count; + size_t diff_len = MAX(n1->n_len, n2->n_len); + size_t diff_scale = MAX(n1->n_scale, n2->n_scale); + size_t min_len = MIN(n1->n_len, n2->n_len); + size_t min_scale = MIN(n1->n_scale, n2->n_scale); + size_t borrow = 0; + size_t count; int val; char *n1ptr, *n2ptr, *diffptr; /* Allocate temporary storage. */ - diff_len = MAX(n1->n_len, n2->n_len); - diff_scale = MAX(n1->n_scale, n2->n_scale); - min_len = MIN(n1->n_len, n2->n_len); - min_scale = MIN(n1->n_scale, n2->n_scale); diff = bc_new_num (diff_len, MAX(diff_scale, scale_min)); /* Initialize the subtract. */ @@ -142,9 +141,6 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1); diffptr = (char *) (diff->n_value + diff_len + diff_scale - 1); - /* Subtract the numbers. */ - borrow = 0; - /* Take care of the longer scaled number. */ if (n1->n_scale != min_scale) { /* n1 has the longer scale */ From 6a29fb72e7ab9ea6ab9a4949c1289ea7be526ca8 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 6 May 2024 09:23:30 +0900 Subject: [PATCH 06/10] use EXPECTED --- ext/bcmath/libbcmath/src/doaddsub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index fdc96b95e3e04..82dbfed490bc5 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -124,9 +124,9 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) { bc_num diff; - size_t diff_len = MAX(n1->n_len, n2->n_len); + size_t diff_len = EXPECTED(n1->n_len >= n2->n_len) ? n1->n_len : n2->n_len; size_t diff_scale = MAX(n1->n_scale, n2->n_scale); - size_t min_len = MIN(n1->n_len, n2->n_len); + size_t min_len = n1->n_len >= n2->n_len ? n2->n_len : n1->n_len; size_t min_scale = MIN(n1->n_scale, n2->n_scale); size_t borrow = 0; size_t count; From ce78b1449656cf17a997669daa7e08617e070c9c Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 6 May 2024 09:33:08 +0900 Subject: [PATCH 07/10] Calculate `min_len + min_scale first` --- ext/bcmath/libbcmath/src/doaddsub.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index 82dbfed490bc5..0fec8b6a1912f 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -128,6 +128,7 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) size_t diff_scale = MAX(n1->n_scale, n2->n_scale); size_t min_len = n1->n_len >= n2->n_len ? n2->n_len : n1->n_len; size_t min_scale = MIN(n1->n_scale, n2->n_scale); + size_t min_bytes = min_len + min_scale; size_t borrow = 0; size_t count; int val; @@ -163,11 +164,11 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) /* Now do the equal length scale and integer parts. */ count = 0; - if (min_len + min_scale >= sizeof(BC_UINT_T)) { + if (min_bytes >= sizeof(BC_UINT_T)) { diffptr++; n1ptr++; n2ptr++; - while (count + sizeof(BC_UINT_T) <= min_len + min_scale) { + while (count + sizeof(BC_UINT_T) <= min_bytes) { diffptr -= sizeof(BC_UINT_T); n1ptr -= sizeof(BC_UINT_T); n2ptr -= sizeof(BC_UINT_T); @@ -212,7 +213,7 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) n2ptr--; } - for (; count < min_len + min_scale; count++) { + for (; count < min_bytes; count++) { val = *n1ptr-- - *n2ptr-- - borrow; if (val < 0) { val += BASE; From 2b6cbadf120b728c5ec1554968fbeb076cfbd491 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 6 May 2024 12:52:51 +0900 Subject: [PATCH 08/10] Moved some constants and macros to private.h --- ext/bcmath/libbcmath/src/bcmath.h | 60 ----------------------------- ext/bcmath/libbcmath/src/convert.c | 1 + ext/bcmath/libbcmath/src/private.h | 62 ++++++++++++++++++++++++++++++ ext/bcmath/libbcmath/src/str2num.c | 1 + 4 files changed, 64 insertions(+), 60 deletions(-) diff --git a/ext/bcmath/libbcmath/src/bcmath.h b/ext/bcmath/libbcmath/src/bcmath.h index c394982b3af23..b8e3b4c7fb86a 100644 --- a/ext/bcmath/libbcmath/src/bcmath.h +++ b/ext/bcmath/libbcmath/src/bcmath.h @@ -78,66 +78,6 @@ typedef struct bc_struct { #define LONG_MAX 0x7ffffff #endif -/* This will be 0x01010101 for 32-bit and 0x0101010101010101 for 64-bit */ -#define SWAR_ONES (~((size_t) 0) / 0xFF) -/* This repeats a byte `x` into an entire 32/64-bit word. - * Example: SWAR_REPEAT(0xAB) will be 0xABABABAB for 32-bit and 0xABABABABABABABAB for 64-bit. */ -#define SWAR_REPEAT(x) (SWAR_ONES * (x)) - -/* Bytes swap */ -#if defined(_MSC_VER) -# include -# define BSWAP32(u) _byteswap_ulong(u) -# define BSWAP64(u) _byteswap_uint64(u) -#else -# ifdef __has_builtin -# if __has_builtin(__builtin_bswap32) -# define BSWAP32(u) __builtin_bswap32(u) -# endif // __has_builtin(__builtin_bswap32) -# if __has_builtin(__builtin_bswap64) -# define BSWAP64(u) __builtin_bswap64(u) -# endif // __has_builtin(__builtin_bswap64) -# elif defined(__GNUC__) -# define BSWAP32(u) __builtin_bswap32(u) -# define BSWAP64(u) __builtin_bswap64(u) -# endif // __has_builtin -#endif // defined(_MSC_VER) -#ifndef BSWAP32 -inline uint32_t BSWAP32(uint32_t u) -{ - return (((u & 0xff000000) >> 24) - | ((u & 0x00ff0000) >> 8) - | ((u & 0x0000ff00) << 8) - | ((u & 0x000000ff) << 24)); -} -#endif -#ifndef BSWAP64 -inline uint64_t BSWAP64(uint64_t u) -{ - return (((u & 0xff00000000000000ULL) >> 56) - | ((u & 0x00ff000000000000ULL) >> 40) - | ((u & 0x0000ff0000000000ULL) >> 24) - | ((u & 0x000000ff00000000ULL) >> 8) - | ((u & 0x00000000ff000000ULL) << 8) - | ((u & 0x0000000000ff0000ULL) << 24) - | ((u & 0x000000000000ff00ULL) << 40) - | ((u & 0x00000000000000ffULL) << 56)); -} -#endif - -#if SIZEOF_SIZE_T >= 8 -#define BC_BSWAP(u) BSWAP64(u) -#define BC_UINT_T uint64_t -#else -#define BC_BSWAP(u) BSWAP32(u) -#define BC_UINT_T uint32_t -#endif - -#ifdef WORDS_BIGENDIAN -#define BC_LITTLE_ENDIAN 0 -#else -#define BC_LITTLE_ENDIAN 1 -#endif /* Function Prototypes */ diff --git a/ext/bcmath/libbcmath/src/convert.c b/ext/bcmath/libbcmath/src/convert.c index 5164215a333f3..01617c0662c64 100644 --- a/ext/bcmath/libbcmath/src/convert.c +++ b/ext/bcmath/libbcmath/src/convert.c @@ -16,6 +16,7 @@ #include "bcmath.h" #include "convert.h" +#include "private.h" #ifdef __SSE2__ # include #endif diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h index 481a651128320..f21bef665f954 100644 --- a/ext/bcmath/libbcmath/src/private.h +++ b/ext/bcmath/libbcmath/src/private.h @@ -34,6 +34,68 @@ #include #include +/* This will be 0x01010101 for 32-bit and 0x0101010101010101 for 64-bit */ +#define SWAR_ONES (~((size_t) 0) / 0xFF) +/* This repeats a byte `x` into an entire 32/64-bit word. + * Example: SWAR_REPEAT(0xAB) will be 0xABABABAB for 32-bit and 0xABABABABABABABAB for 64-bit. */ +#define SWAR_REPEAT(x) (SWAR_ONES * (x)) + +/* Bytes swap */ +#if defined(_MSC_VER) +# include +# define BSWAP32(u) _byteswap_ulong(u) +# define BSWAP64(u) _byteswap_uint64(u) +#else +# ifdef __has_builtin +# if __has_builtin(__builtin_bswap32) +# define BSWAP32(u) __builtin_bswap32(u) +# endif // __has_builtin(__builtin_bswap32) +# if __has_builtin(__builtin_bswap64) +# define BSWAP64(u) __builtin_bswap64(u) +# endif // __has_builtin(__builtin_bswap64) +# elif defined(__GNUC__) +# define BSWAP32(u) __builtin_bswap32(u) +# define BSWAP64(u) __builtin_bswap64(u) +# endif // __has_builtin +#endif // defined(_MSC_VER) +#ifndef BSWAP32 +inline uint32_t BSWAP32(uint32_t u) +{ + return (((u & 0xff000000) >> 24) + | ((u & 0x00ff0000) >> 8) + | ((u & 0x0000ff00) << 8) + | ((u & 0x000000ff) << 24)); +} +#endif +#ifndef BSWAP64 +inline uint64_t BSWAP64(uint64_t u) +{ + return (((u & 0xff00000000000000ULL) >> 56) + | ((u & 0x00ff000000000000ULL) >> 40) + | ((u & 0x0000ff0000000000ULL) >> 24) + | ((u & 0x000000ff00000000ULL) >> 8) + | ((u & 0x00000000ff000000ULL) << 8) + | ((u & 0x0000000000ff0000ULL) << 24) + | ((u & 0x000000000000ff00ULL) << 40) + | ((u & 0x00000000000000ffULL) << 56)); +} +#endif + +#if SIZEOF_SIZE_T >= 8 +#define BC_BSWAP(u) BSWAP64(u) +#define BC_UINT_T uint64_t +#else +#define BC_BSWAP(u) BSWAP32(u) +#define BC_UINT_T uint32_t +#endif + +#ifdef WORDS_BIGENDIAN +#define BC_LITTLE_ENDIAN 0 +#else +#define BC_LITTLE_ENDIAN 1 +#endif + + /* routines */ int _bc_do_compare (bc_num n1, bc_num n2, bool use_sign); bc_num _bc_do_add (bc_num n1, bc_num n2, size_t scale_min); diff --git a/ext/bcmath/libbcmath/src/str2num.c b/ext/bcmath/libbcmath/src/str2num.c index 370d899772ff8..4787e16b53a2b 100644 --- a/ext/bcmath/libbcmath/src/str2num.c +++ b/ext/bcmath/libbcmath/src/str2num.c @@ -31,6 +31,7 @@ #include "bcmath.h" #include "convert.h" +#include "private.h" #include #include #ifdef __SSE2__ From 318601cc514b2c35e5a8778b6731a4f86ca21e97 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 6 May 2024 20:04:26 +0900 Subject: [PATCH 09/10] Added and fixed comments --- ext/bcmath/libbcmath/src/doaddsub.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index 0fec8b6a1912f..2952805f52b1c 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -124,8 +124,10 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) { bc_num diff; + /* The caller is guaranteed that n1 is always large. */ size_t diff_len = EXPECTED(n1->n_len >= n2->n_len) ? n1->n_len : n2->n_len; size_t diff_scale = MAX(n1->n_scale, n2->n_scale); + /* Same condition as EXPECTED before, but using EXPECTED again will make it slower. */ size_t min_len = n1->n_len >= n2->n_len ? n2->n_len : n1->n_len; size_t min_scale = MIN(n1->n_scale, n2->n_scale); size_t min_bytes = min_len + min_scale; @@ -164,6 +166,7 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) /* Now do the equal length scale and integer parts. */ count = 0; + /* Uses SIMD to perform calculations at high speed. */ if (min_bytes >= sizeof(BC_UINT_T)) { diffptr++; n1ptr++; @@ -179,27 +182,27 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) memcpy(&n2bytes, n2ptr, sizeof(n2bytes)); #if BC_LITTLE_ENDIAN - /* Bytes swap */ + /* Little endian requires changing the order of bytes. */ n1bytes = BC_BSWAP(n1bytes); n2bytes = BC_BSWAP(n2bytes); #endif n1bytes -= n2bytes + borrow; - /* If the most significant 4 bits of the 8 bytes are not 0, a carry-down has occurred. */ + /* If the most significant bit is 1, a carry down has occurred. */ bool tmp_borrow = n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1)); /* - * If any one of the upper 4 bits of each of the 8 bytes is 1, subtract 6 from that byte. - * The fact that the upper 4 bits are not 0 means that a carry-down has occurred, and when - * the hexadecimal number is carried down, there is a difference of 6 from the decimal - * calculation, so 6 is subtracted. - * Also, set all upper 4 bits to 0. + * Check the most significant bit of each of the 16 bytes, and if it is 1, a carry down has + * occurred. When carrying down occurs, due to the difference between decimal and hexadecimal + * numbers, an extra 6 is added to the lower 4 bits. + * Therefore, for a byte that has been carried down, set all the upper 4 bits to 0 and subtract + * 6 from the lower 4 bits to adjust it to the correct value as a decimal number. */ BC_UINT_T borrow_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0x06; n1bytes = (n1bytes & SWAR_REPEAT(0x0F)) - borrow_mask; #if BC_LITTLE_ENDIAN - /* Bytes swap */ + /* Little endian requires changing the order of bytes back. */ n1bytes = BC_BSWAP(n1bytes); #endif @@ -213,6 +216,7 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) n2ptr--; } + /* Calculate the remaining bytes that are less than the size of BC_UINT_T using a normal loop. */ for (; count < min_bytes; count++) { val = *n1ptr-- - *n2ptr-- - borrow; if (val < 0) { From 86a0084f81ba72d90af03b8d5ed240c68fe867ac Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 6 May 2024 20:06:21 +0900 Subject: [PATCH 10/10] fixed comment --- ext/bcmath/libbcmath/src/doaddsub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index 2952805f52b1c..eb2f7b6645d7a 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -192,7 +192,7 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) bool tmp_borrow = n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1)); /* - * Check the most significant bit of each of the 16 bytes, and if it is 1, a carry down has + * Check the most significant bit of each of the bytes, and if it is 1, a carry down has * occurred. When carrying down occurs, due to the difference between decimal and hexadecimal * numbers, an extra 6 is added to the lower 4 bits. * Therefore, for a byte that has been carried down, set all the upper 4 bits to 0 and subtract