From d58fbe574cfffbe7a50a375598589218070abba7 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Fri, 10 May 2024 21:30:22 +0900 Subject: [PATCH 1/5] The basic structure of the code was changed to the same as sub --- ext/bcmath/libbcmath/src/doaddsub.c | 82 +++++++++++++---------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index eb2f7b6645d7a..ff24cc8a6bea0 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -41,75 +41,67 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) { bc_num sum; - size_t sum_scale, sum_digits; + size_t sum_len = MAX(n1->n_len, n2->n_len) + 1; + size_t sum_scale = MAX(n1->n_scale, n2->n_scale); + size_t min_len = MIN (n1->n_len, n2->n_len); + size_t min_scale = MIN(n1->n_scale, n2->n_scale); + size_t min_bytes = min_len + min_scale; char *n1ptr, *n2ptr, *sumptr; - size_t n1bytes, n2bytes; - bool carry; + bool carry = 0; + size_t count; /* Prepare sum. */ - sum_scale = MAX (n1->n_scale, n2->n_scale); - sum_digits = MAX (n1->n_len, n2->n_len) + 1; - sum = bc_new_num (sum_digits, MAX(sum_scale, scale_min)); + sum = bc_new_num (sum_len, MAX(sum_scale, scale_min)); /* Start with the fraction part. Initialize the pointers. */ - n1bytes = n1->n_scale; - n2bytes = n2->n_scale; - n1ptr = (char *) (n1->n_value + n1->n_len + n1bytes - 1); - n2ptr = (char *) (n2->n_value + n2->n_len + n2bytes - 1); - sumptr = (char *) (sum->n_value + sum_scale + sum_digits - 1); + n1ptr = (char *) (n1->n_value + n1->n_len + n1->n_scale - 1); + n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1); + sumptr = (char *) (sum->n_value + sum_scale + sum_len - 1); /* Add the fraction part. First copy the longer fraction.*/ - if (n1bytes != n2bytes) { - if (n1bytes > n2bytes) { - while (n1bytes > n2bytes) { - *sumptr-- = *n1ptr--; - n1bytes--; - } - } else { - while (n2bytes > n1bytes) { - *sumptr-- = *n2ptr--; - n2bytes--; - } + if (n1->n_scale != min_scale) { + /* n1 has the longer scale */ + for (count = n1->n_scale - min_scale; count > 0; count--) { + *sumptr-- = *n1ptr--; + } + } else { + /* n2 has the longer scale */ + for (count = n2->n_scale - min_scale; count > 0; count--) { + *sumptr-- = *n2ptr--; } } /* Now add the remaining fraction part and equal size integer parts. */ - n1bytes += n1->n_len; - n2bytes += n2->n_len; - carry = 0; - while ((n1bytes > 0) && (n2bytes > 0)) { + for (count = 0; count < min_bytes; count++) { *sumptr = *n1ptr-- + *n2ptr-- + carry; - if (*sumptr > (BASE - 1)) { - carry = 1; + if (*sumptr >= BASE) { *sumptr -= BASE; + carry = 1; } else { carry = 0; } sumptr--; - n1bytes--; - n2bytes--; } /* Now add carry the longer integer part. */ - if (n1bytes == 0) { - n1bytes = n2bytes; - n1ptr = n2ptr; - } - while (n1bytes-- > 0) { - *sumptr = *n1ptr-- + carry; - if (*sumptr > (BASE - 1)) { - carry = true; - *sumptr -= BASE; - } else { - carry = false; + if (sum_len != min_len) { + if (n2->n_len > n1->n_len) { + n1ptr = n2ptr; + } + for (count = sum_len - min_len; count > 0; count--) { + *sumptr = *n1ptr-- + carry; + if (*sumptr >= BASE) { + *sumptr -= BASE; + carry = 1; + } else { + carry = 0; + } + sumptr--; } - sumptr--; } /* Set final carry. */ - if (carry) { - *sumptr += 1; - } + *sumptr += carry; /* Adjust sum and return. */ _bc_rm_leading_zeros(sum); From 09352132100a3a2e1120af3631932d6ab538df91 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Fri, 10 May 2024 23:02:49 +0900 Subject: [PATCH 2/5] use SIMD --- ext/bcmath/libbcmath/src/doaddsub.c | 52 ++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index ff24cc8a6bea0..f68de54ecd05b 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -72,7 +72,57 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) } /* Now add the remaining fraction part and equal size integer parts. */ - for (count = 0; count < min_bytes; count++) { + count = 0; + /* Uses SIMD to perform calculations at high speed. */ + if (min_bytes >= sizeof(BC_UINT_T)) { + sumptr++; + n1ptr++; + n2ptr++; + while (count + sizeof(BC_UINT_T) <= min_bytes) { + sumptr -= sizeof(BC_UINT_T); + n1ptr -= sizeof(BC_UINT_T); + n2ptr -= sizeof(BC_UINT_T); + + BC_UINT_T n1bytes; + BC_UINT_T n2bytes; + memcpy(&n1bytes, n1ptr, sizeof(n1bytes)); + memcpy(&n2bytes, n2ptr, sizeof(n2bytes)); + +#if BC_LITTLE_ENDIAN + /* Little endian requires changing the order of bytes. */ + n1bytes = BC_BSWAP(n1bytes); + n2bytes = BC_BSWAP(n2bytes); +#endif + + n1bytes += SWAR_REPEAT(0xF6) + n2bytes + carry; + /* If the most significant bit is 1, a carry down has occurred. */ + carry = !(n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1))); + + /* + * Check the most significant bit of each of the bytes, and if it is 1, a carry down has + * occurred. When carrying down occurs, due to the difference between decimal and hexadecimal + * numbers, an extra 6 is added to the lower 4 bits. + * Therefore, for a byte that has been carried down, set all the upper 4 bits to 0 and subtract + * 6 from the lower 4 bits to adjust it to the correct value as a decimal number. + */ + BC_UINT_T sum_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0xF6; + n1bytes -= sum_mask; + +#if BC_LITTLE_ENDIAN + /* Little endian requires changing the order of bytes back. */ + n1bytes = BC_BSWAP(n1bytes); +#endif + + memcpy(sumptr, &n1bytes, sizeof(n1bytes)); + + count += sizeof(BC_UINT_T); + } + sumptr--; + n1ptr--; + n2ptr--; + } + + for (; count < min_bytes; count++) { *sumptr = *n1ptr-- + *n2ptr-- + carry; if (*sumptr >= BASE) { *sumptr -= BASE; From aa80c0d2224f4263ced7706748a191afbee530bf Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Fri, 10 May 2024 23:22:01 +0900 Subject: [PATCH 3/5] fixed comments --- ext/bcmath/libbcmath/src/doaddsub.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index f68de54ecd05b..70897a40d67c7 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -94,16 +94,22 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) n2bytes = BC_BSWAP(n2bytes); #endif + /* + * In order to add 1 to the "next digit" when a carry occurs, adjust it so that it + * overflows when add 10. + * e.g. + * 00001001(9) + 00000001(1) = 00001010(10) to + * 11111111 + 00000001 = 00000000(0) and carry 1 + */ n1bytes += SWAR_REPEAT(0xF6) + n2bytes + carry; - /* If the most significant bit is 1, a carry down has occurred. */ + /* If the most significant bit is 0, a carry has occurred. */ carry = !(n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1))); /* - * Check the most significant bit of each of the bytes, and if it is 1, a carry down has - * occurred. When carrying down occurs, due to the difference between decimal and hexadecimal - * numbers, an extra 6 is added to the lower 4 bits. - * Therefore, for a byte that has been carried down, set all the upper 4 bits to 0 and subtract - * 6 from the lower 4 bits to adjust it to the correct value as a decimal number. + * The calculation result is a mixture of bytes that have been carried and bytes that have not. + * The most significant bit of each byte is 0 if it is carried forward, and 1 if it is not. + * Using this, subtract the 0xF6 added for adjustment from the byte that has not been carried + * over to return it to the correct value as a decimal number. */ BC_UINT_T sum_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0xF6; n1bytes -= sum_mask; From 64eb41a99802823a4d6be75a2bd657dbc199b8f3 Mon Sep 17 00:00:00 2001 From: Saki Takamachi <34942839+SakiTakamachi@users.noreply.github.com> Date: Sat, 11 May 2024 07:53:30 +0900 Subject: [PATCH 4/5] fix compare length --- ext/bcmath/libbcmath/src/doaddsub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index 70897a40d67c7..3b27ea8b1ef9f 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -140,11 +140,11 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) } /* Now add carry the longer integer part. */ - if (sum_len != min_len) { + if (sum_len - 1 != min_len) { if (n2->n_len > n1->n_len) { n1ptr = n2ptr; } - for (count = sum_len - min_len; count > 0; count--) { + for (count = sum_len - 1 - min_len; count > 0; count--) { *sumptr = *n1ptr-- + carry; if (*sumptr >= BASE) { *sumptr -= BASE; From e07dd7c2cbcce7e180324f237cc1779811f9b789 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Sat, 11 May 2024 08:42:32 +0900 Subject: [PATCH 5/5] optimize --- ext/bcmath/libbcmath/src/doaddsub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c index 3b27ea8b1ef9f..e6bedd57ea08e 100644 --- a/ext/bcmath/libbcmath/src/doaddsub.c +++ b/ext/bcmath/libbcmath/src/doaddsub.c @@ -140,11 +140,11 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) } /* Now add carry the longer integer part. */ - if (sum_len - 1 != min_len) { + if (n1->n_len != n2->n_len) { if (n2->n_len > n1->n_len) { n1ptr = n2ptr; } - for (count = sum_len - 1 - min_len; count > 0; count--) { + for (count = sum_len - min_len; count > 1; count--) { *sumptr = *n1ptr-- + carry; if (*sumptr >= BASE) { *sumptr -= BASE;