From 91b93433bc8e20f655bd307bfe0664876d9672a3 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 13 May 2024 17:40:23 +0900 Subject: [PATCH 01/15] Changed the bcmul calculation method Multiplication is performed after converting to unsigned long, resulting in faster calculations. --- ext/bcmath/libbcmath/src/private.h | 4 + ext/bcmath/libbcmath/src/recmul.c | 313 +++++++++++------------------ 2 files changed, 120 insertions(+), 197 deletions(-) diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h index 1403baad0c257..fa538150ef73a 100644 --- a/ext/bcmath/libbcmath/src/private.h +++ b/ext/bcmath/libbcmath/src/private.h @@ -84,9 +84,13 @@ static inline uint64_t BC_BSWAP64(uint64_t u) #if SIZEOF_SIZE_T >= 8 # define BC_BSWAP(u) BC_BSWAP64(u) # define BC_UINT_T uint64_t +# define BC_LONGABLE_DIGITS 8 +# define BC_LONGABLE_OVERFLOW 100000000 #else # define BC_BSWAP(u) BC_BSWAP32(u) # define BC_UINT_T uint32_t +# define BC_LONGABLE_DIGITS 4 +# define BC_LONGABLE_OVERFLOW 10000 #endif #ifdef WORDS_BIGENDIAN diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 3b3b696f99d46..75ace8a88355a 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -36,217 +36,134 @@ #include "private.h" /* For _bc_rm_leading_zeros() */ #include "zend_alloc.h" -/* Recursive vs non-recursive multiply crossover ranges. */ -#if defined(MULDIGITS) -#include "muldigits.h" -#else -#define MUL_BASE_DIGITS 80 -#endif - -int mul_base_digits = MUL_BASE_DIGITS; -#define MUL_SMALL_DIGITS mul_base_digits/4 /* Multiply utility routines */ -static bc_num new_sub_num(size_t length, size_t scale, char *value) +/* + * Converts BCD to long, going backwards from pointer n by the number of + * characters specified by len. + */ +static inline unsigned long bc_partial_convert_to_long(const char *n, size_t len) { - bc_num temp = (bc_num) emalloc(sizeof(bc_struct)); + unsigned long num = 0; + unsigned long base = 1; - temp->n_sign = PLUS; - temp->n_len = length; - temp->n_scale = scale; - temp->n_refs = 1; - temp->n_value = value; - return temp; + for (size_t i = 0; i < len; i++) { + num += *n * base; + base *= BASE; + n--; + } + + return num; } -static void _bc_simp_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) +/* + * If the n_values ​​of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less, + * the calculation will be performed at high speed without using an array. + */ +static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) { - char *n1ptr, *n2ptr, *pvptr; - char *n1end, *n2end; /* To the end of n1 and n2. */ - int sum = 0; + char *n1end = n1->n_value + n1len - 1; + char *n2end = n2->n_value + n2len - 1; - int prodlen = n1len + n2len + 1; + unsigned long n1_l = bc_partial_convert_to_long(n1end, n1len); + unsigned long n2_l = bc_partial_convert_to_long(n2end, n2len); + unsigned long prod_l = n1_l * n2_l; + size_t prodlen = n1len + n2len; *prod = bc_new_num_nonzeroed(prodlen, 0); + char *pptr = (*prod)->n_value; + char *pend = pptr + prodlen - 1; - n1end = (char *) (n1->n_value + n1len - 1); - n2end = (char *) (n2->n_value + n2len - 1); - pvptr = (char *) ((*prod)->n_value + prodlen - 1); - - /* Here is the loop... */ - for (int index = 0; index < prodlen - 1; index++) { - n1ptr = (char *) (n1end - MAX(0, index - n2len + 1)); - n2ptr = (char *) (n2end - MIN(index, n2len - 1)); - while ((n1ptr >= n1->n_value) && (n2ptr <= n2end)) { - sum += *n1ptr * *n2ptr; - n1ptr--; - n2ptr++; - } - *pvptr-- = sum % BASE; - sum = sum / BASE; + while (pend >= pptr) { + *pend-- = prod_l % BASE; + prod_l /= BASE; } - *pvptr = sum; } - -/* A special adder/subtractor for the recursive divide and conquer - multiply algorithm. Note: if sub is called, accum must - be larger that what is being subtracted. Also, accum and val - must have n_scale = 0. (e.g. they must look like integers. *) */ -static void _bc_shift_addsub(bc_num accum, bc_num val, int shift, bool sub) +/* + * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of unsigned longs. + * The array is generated starting with the smaller digits. + * e.g. 12345678901234567890 => {34567890, 56789012, 1234} + * + * Multiply and add these groups of numbers to perform multiplication fast. + * How much to shift the digits when adding values ​​can be calculated from the index of the array. + */ +static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) { - signed char *accp, *valp; - unsigned int carry = 0; - size_t count = val->n_len; - - if (val->n_value[0] == 0) { - count--; + size_t i; + char *n1end = n1->n_value + n1len - 1; + char *n2end = n2->n_value + n2len - 1; + size_t prodlen = n1len + n2len; + + size_t n1_arr_size = n1len / BC_LONGABLE_DIGITS + (n1len % BC_LONGABLE_DIGITS ? 1 : 0); + size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0); + size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; + + unsigned long n1_l[n1_arr_size]; + unsigned long n2_l[n2_arr_size]; + unsigned long prod_l[prod_arr_size]; + for (i = 0; i < prod_arr_size; i++) { + prod_l[i] = 0; } - assert(accum->n_len + accum->n_scale >= shift + count); - - /* Set up pointers and others */ - accp = (signed char *) (accum->n_value + accum->n_len + accum->n_scale - shift - 1); - valp = (signed char *) (val->n_value + val->n_len - 1); - - if (sub) { - /* Subtraction, carry is really borrow. */ - while (count--) { - *accp -= *valp-- + carry; - if (*accp < 0) { - carry = 1; - *accp-- += BASE; - } else { - carry = 0; - accp--; - } - } - while (carry) { - *accp -= carry; - if (*accp < 0) { - *accp-- += BASE; - } else { - carry = 0; - } - } - } else { - /* Addition */ - while (count--) { - *accp += *valp-- + carry; - if (*accp > (BASE - 1)) { - carry = 1; - *accp-- -= BASE; - } else { - carry = 0; - accp--; - } - } - while (carry) { - *accp += carry; - if (*accp > (BASE - 1)) { - *accp-- -= BASE; - } else { - carry = 0; - } - } - } -} - -/* Recursive divide and conquer multiply algorithm. - Based on - Let u = u0 + u1*(b^n) - Let v = v0 + v1*(b^n) - Then uv = (B^2n+B^n)*u1*v1 + B^n*(u1-u0)*(v0-v1) + (B^n+1)*u0*v0 - B is the base of storage, number of digits in u1,u0 close to equal. -*/ -static void _bc_rec_mul(bc_num u, size_t ulen, bc_num v, size_t vlen, bc_num *prod) -{ - bc_num u0, u1, v0, v1; - bc_num m1, m2, m3; - size_t n; - bool m1zero; - - /* Base case? */ - if ((ulen + vlen) < mul_base_digits - || ulen < MUL_SMALL_DIGITS - || vlen < MUL_SMALL_DIGITS - ) { - _bc_simp_mul(u, ulen, v, vlen, prod); - return; + /* Convert n1 to long[] */ + i = 0; + while (n1len > 0) { + size_t len = MIN(BC_LONGABLE_DIGITS, n1len); + n1_l[i] = bc_partial_convert_to_long(n1end, len); + n1end -= len; + n1len -= len; + i++; } - /* Calculate n -- the u and v split point in digits. */ - n = (MAX(ulen, vlen) + 1) / 2; - - /* Split u and v. */ - if (ulen < n) { - u1 = bc_copy_num(BCG(_zero_)); - u0 = new_sub_num(ulen, 0, u->n_value); - } else { - u1 = new_sub_num(ulen - n, 0, u->n_value); - u0 = new_sub_num(n, 0, u->n_value + ulen - n); - } - if (vlen < n) { - v1 = bc_copy_num(BCG(_zero_)); - v0 = new_sub_num(vlen, 0, v->n_value); - } else { - v1 = new_sub_num(vlen - n, 0, v->n_value); - v0 = new_sub_num(n, 0, v->n_value + vlen - n); + /* Convert n2 to long[] */ + i = 0; + while (n2len > 0) { + size_t len = MIN(BC_LONGABLE_DIGITS, n2len); + n2_l[i] = bc_partial_convert_to_long(n2end, len); + n2end -= len; + n2len -= len; + i++; } - _bc_rm_leading_zeros(u1); - _bc_rm_leading_zeros(u0); - _bc_rm_leading_zeros(v1); - _bc_rm_leading_zeros(v0); - - m1zero = bc_is_zero(u1) || bc_is_zero(v1); - - /* Calculate sub results ... */ - - bc_num d1 = bc_sub(u1, u0, 0); - bc_num d2 = bc_sub(v0, v1, 0); - - /* Do recursive multiplies and shifted adds. */ - if (m1zero) { - m1 = bc_copy_num(BCG(_zero_)); - } else { - _bc_rec_mul(u1, u1->n_len, v1, v1->n_len, &m1); + /* Multiplication and addition */ + for (i = 0; i < n1_arr_size; i++) { + for (size_t j = 0; j < n2_arr_size; j++) { + prod_l[i + j] += n1_l[i] * n2_l[j]; + } } - if (bc_is_zero(d1) || bc_is_zero(d2)) { - m2 = bc_copy_num(BCG(_zero_)); - } else { - _bc_rec_mul(d1, d1->n_len, d2, d2->n_len, &m2); + /* + * Move a value exceeding 8 digits by carrying to the next digit. + * However, the last digit does nothing. + */ + for (i = 0; i < prod_arr_size - 1; i++) { + prod_l[i + 1] += prod_l[i] / BC_LONGABLE_OVERFLOW; + prod_l[i] %= BC_LONGABLE_OVERFLOW; } - if (bc_is_zero(u0) || bc_is_zero(v0)) { - m3 = bc_copy_num(BCG(_zero_)); - } else { - _bc_rec_mul(u0, u0->n_len, v0, v0->n_len, &m3); + /* Convert to bc_num */ + *prod = bc_new_num_nonzeroed(prodlen, 0); + char *pptr = (*prod)->n_value; + char *pend = pptr + prodlen - 1; + i = 0; + while (i < prod_arr_size - 1) { + for (size_t j = 0; j < BC_LONGABLE_DIGITS; j++) { + *pend-- = prod_l[i] % BASE; + prod_l[i] /= BASE; + } + i++; } - /* Initialize product */ - *prod = bc_new_num(ulen + vlen + 1, 0); - - if (!m1zero) { - _bc_shift_addsub(*prod, m1, 2 * n, false); - _bc_shift_addsub(*prod, m1, n, false); + /* + * The last digit may carry over. + * Also need to fill it to the end with zeros, so loop until the end of the string. + */ + while (pend >= pptr) { + *pend-- = prod_l[i] % BASE; + prod_l[i] /= BASE; } - _bc_shift_addsub(*prod, m3, n, false); - _bc_shift_addsub(*prod, m3, 0, false); - _bc_shift_addsub(*prod, m2, n, d1->n_sign != d2->n_sign); - - /* Now clean up! */ - bc_free_num (&u1); - bc_free_num (&u0); - bc_free_num (&v1); - bc_free_num (&m1); - bc_free_num (&v0); - bc_free_num (&m2); - bc_free_num (&m3); - bc_free_num (&d1); - bc_free_num (&d2); } /* The multiply routine. N2 times N1 is put int PROD with the scale of @@ -255,26 +172,28 @@ static void _bc_rec_mul(bc_num u, size_t ulen, bc_num v, size_t vlen, bc_num *pr bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale) { - bc_num pval; - size_t len1, len2; - size_t full_scale, prod_scale; + bc_num prod; /* Initialize things. */ - len1 = n1->n_len + n1->n_scale; - len2 = n2->n_len + n2->n_scale; - full_scale = n1->n_scale + n2->n_scale; - prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale))); + size_t len1 = n1->n_len + n1->n_scale; + size_t len2 = n2->n_len + n2->n_scale; + size_t full_scale = n1->n_scale + n2->n_scale; + size_t prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale))); /* Do the multiply */ - _bc_rec_mul(n1, len1, n2, len2, &pval); + if (len1 <= BC_LONGABLE_DIGITS && len2 <= BC_LONGABLE_DIGITS) { + bc_fast_mul(n1, len1, n2, len2, &prod); + } else { + bc_standard_mul(n1, len1, n2, len2, &prod); + } /* Assign to prod and clean up the number. */ - pval->n_sign = (n1->n_sign == n2->n_sign ? PLUS : MINUS); - pval->n_len = len2 + len1 + 1 - full_scale; - pval->n_scale = prod_scale; - _bc_rm_leading_zeros(pval); - if (bc_is_zero(pval)) { - pval->n_sign = PLUS; + prod->n_sign = (n1->n_sign == n2->n_sign ? PLUS : MINUS); + prod->n_len -= full_scale; + prod->n_scale = prod_scale; + _bc_rm_leading_zeros(prod); + if (bc_is_zero(prod)) { + prod->n_sign = PLUS; } - return pval; + return prod; } From ec9f8dfe5a9be43fee345ff00355e54e73f81d49 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 13 May 2024 18:44:16 +0900 Subject: [PATCH 02/15] Fixed array allocation --- ext/bcmath/libbcmath/src/recmul.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 75ace8a88355a..f676ab4f672f6 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -100,12 +100,9 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0); size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; - unsigned long n1_l[n1_arr_size]; - unsigned long n2_l[n2_arr_size]; - unsigned long prod_l[prod_arr_size]; - for (i = 0; i < prod_arr_size; i++) { - prod_l[i] = 0; - } + unsigned long *n1_l = emalloc(n1_arr_size * sizeof(unsigned long)); + unsigned long *n2_l = emalloc(n2_arr_size * sizeof(unsigned long)); + unsigned long *prod_l = ecalloc(prod_arr_size, sizeof(unsigned long)); /* Convert n1 to long[] */ i = 0; @@ -164,6 +161,10 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu *pend-- = prod_l[i] % BASE; prod_l[i] /= BASE; } + + efree(n1_l); + efree(n2_l); + efree(prod_l); } /* The multiply routine. N2 times N1 is put int PROD with the scale of From ac9342df8f5d2cd16d8b214fa8861059b4bdb955 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 13 May 2024 19:28:57 +0900 Subject: [PATCH 03/15] Fixed array allocation --- ext/bcmath/libbcmath/src/recmul.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index f676ab4f672f6..6edf856b5a2df 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -100,9 +100,15 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0); size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; - unsigned long *n1_l = emalloc(n1_arr_size * sizeof(unsigned long)); - unsigned long *n2_l = emalloc(n2_arr_size * sizeof(unsigned long)); - unsigned long *prod_l = ecalloc(prod_arr_size, sizeof(unsigned long)); + unsigned long *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(unsigned long)); + + unsigned long *n1_l = buf; + unsigned long *n2_l = buf + n1_arr_size; + unsigned long *prod_l = n2_l + n2_arr_size; + + for (i = 0; i < prod_arr_size; i++) { + prod_l[i] = 0; + } /* Convert n1 to long[] */ i = 0; @@ -162,9 +168,7 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu prod_l[i] /= BASE; } - efree(n1_l); - efree(n2_l); - efree(prod_l); + efree(buf); } /* The multiply routine. N2 times N1 is put int PROD with the scale of From 899bb39ee2f209d348fe47cf40dea10d4261e6ba Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Mon, 13 May 2024 19:32:10 +0900 Subject: [PATCH 04/15] Changed unsigned long to BC_UINT_T --- ext/bcmath/libbcmath/src/recmul.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 6edf856b5a2df..e457a7333e132 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -43,10 +43,10 @@ * Converts BCD to long, going backwards from pointer n by the number of * characters specified by len. */ -static inline unsigned long bc_partial_convert_to_long(const char *n, size_t len) +static inline BC_UINT_T bc_partial_convert_to_long(const char *n, size_t len) { - unsigned long num = 0; - unsigned long base = 1; + BC_UINT_T num = 0; + BC_UINT_T base = 1; for (size_t i = 0; i < len; i++) { num += *n * base; @@ -66,9 +66,9 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc char *n1end = n1->n_value + n1len - 1; char *n2end = n2->n_value + n2len - 1; - unsigned long n1_l = bc_partial_convert_to_long(n1end, n1len); - unsigned long n2_l = bc_partial_convert_to_long(n2end, n2len); - unsigned long prod_l = n1_l * n2_l; + BC_UINT_T n1_l = bc_partial_convert_to_long(n1end, n1len); + BC_UINT_T n2_l = bc_partial_convert_to_long(n2end, n2len); + BC_UINT_T prod_l = n1_l * n2_l; size_t prodlen = n1len + n2len; *prod = bc_new_num_nonzeroed(prodlen, 0); @@ -82,7 +82,7 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc } /* - * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of unsigned longs. + * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_UINT_Ts. * The array is generated starting with the smaller digits. * e.g. 12345678901234567890 => {34567890, 56789012, 1234} * @@ -100,11 +100,11 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0); size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; - unsigned long *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(unsigned long)); + BC_UINT_T *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(BC_UINT_T)); - unsigned long *n1_l = buf; - unsigned long *n2_l = buf + n1_arr_size; - unsigned long *prod_l = n2_l + n2_arr_size; + BC_UINT_T *n1_l = buf; + BC_UINT_T *n2_l = buf + n1_arr_size; + BC_UINT_T *prod_l = n2_l + n2_arr_size; for (i = 0; i < prod_arr_size; i++) { prod_l[i] = 0; From ea57a9f3c8a94372065aa3d82cb06cdd727c415f Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Tue, 14 May 2024 08:21:32 +0900 Subject: [PATCH 05/15] Fixed comments and constant names --- ext/bcmath/libbcmath/src/private.h | 8 ++++---- ext/bcmath/libbcmath/src/recmul.c | 32 +++++++++++++++--------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h index fa538150ef73a..7e367441292ee 100644 --- a/ext/bcmath/libbcmath/src/private.h +++ b/ext/bcmath/libbcmath/src/private.h @@ -84,13 +84,13 @@ static inline uint64_t BC_BSWAP64(uint64_t u) #if SIZEOF_SIZE_T >= 8 # define BC_BSWAP(u) BC_BSWAP64(u) # define BC_UINT_T uint64_t -# define BC_LONGABLE_DIGITS 8 -# define BC_LONGABLE_OVERFLOW 100000000 +# define BC_MUL_UINT_DIGITS 8 +# define BC_MUL_UINT_OVERFLOW 100000000 #else # define BC_BSWAP(u) BC_BSWAP32(u) # define BC_UINT_T uint32_t -# define BC_LONGABLE_DIGITS 4 -# define BC_LONGABLE_OVERFLOW 10000 +# define BC_MUL_UINT_DIGITS 4 +# define BC_MUL_UINT_OVERFLOW 10000 #endif #ifdef WORDS_BIGENDIAN diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index e457a7333e132..0d916747e0e37 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -40,10 +40,10 @@ /* Multiply utility routines */ /* - * Converts BCD to long, going backwards from pointer n by the number of + * Converts BCD to uint, going backwards from pointer n by the number of * characters specified by len. */ -static inline BC_UINT_T bc_partial_convert_to_long(const char *n, size_t len) +static inline BC_UINT_T bc_partial_convert_to_uint(const char *n, size_t len) { BC_UINT_T num = 0; BC_UINT_T base = 1; @@ -66,8 +66,8 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc char *n1end = n1->n_value + n1len - 1; char *n2end = n2->n_value + n2len - 1; - BC_UINT_T n1_l = bc_partial_convert_to_long(n1end, n1len); - BC_UINT_T n2_l = bc_partial_convert_to_long(n2end, n2len); + BC_UINT_T n1_l = bc_partial_convert_to_uint(n1end, n1len); + BC_UINT_T n2_l = bc_partial_convert_to_uint(n2end, n2len); BC_UINT_T prod_l = n1_l * n2_l; size_t prodlen = n1len + n2len; @@ -96,8 +96,8 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu char *n2end = n2->n_value + n2len - 1; size_t prodlen = n1len + n2len; - size_t n1_arr_size = n1len / BC_LONGABLE_DIGITS + (n1len % BC_LONGABLE_DIGITS ? 1 : 0); - size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0); + size_t n1_arr_size = n1len / BC_MUL_UINT_DIGITS + (n1len % BC_MUL_UINT_DIGITS ? 1 : 0); + size_t n2_arr_size = n2len / BC_MUL_UINT_DIGITS + (n2len % BC_MUL_UINT_DIGITS ? 1 : 0); size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; BC_UINT_T *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(BC_UINT_T)); @@ -110,21 +110,21 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu prod_l[i] = 0; } - /* Convert n1 to long[] */ + /* Convert n1 to uint[] */ i = 0; while (n1len > 0) { - size_t len = MIN(BC_LONGABLE_DIGITS, n1len); - n1_l[i] = bc_partial_convert_to_long(n1end, len); + size_t len = MIN(BC_MUL_UINT_DIGITS, n1len); + n1_l[i] = bc_partial_convert_to_uint(n1end, len); n1end -= len; n1len -= len; i++; } - /* Convert n2 to long[] */ + /* Convert n2 to uint[] */ i = 0; while (n2len > 0) { - size_t len = MIN(BC_LONGABLE_DIGITS, n2len); - n2_l[i] = bc_partial_convert_to_long(n2end, len); + size_t len = MIN(BC_MUL_UINT_DIGITS, n2len); + n2_l[i] = bc_partial_convert_to_uint(n2end, len); n2end -= len; n2len -= len; i++; @@ -142,8 +142,8 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu * However, the last digit does nothing. */ for (i = 0; i < prod_arr_size - 1; i++) { - prod_l[i + 1] += prod_l[i] / BC_LONGABLE_OVERFLOW; - prod_l[i] %= BC_LONGABLE_OVERFLOW; + prod_l[i + 1] += prod_l[i] / BC_MUL_UINT_OVERFLOW; + prod_l[i] %= BC_MUL_UINT_OVERFLOW; } /* Convert to bc_num */ @@ -152,7 +152,7 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu char *pend = pptr + prodlen - 1; i = 0; while (i < prod_arr_size - 1) { - for (size_t j = 0; j < BC_LONGABLE_DIGITS; j++) { + for (size_t j = 0; j < BC_MUL_UINT_DIGITS; j++) { *pend-- = prod_l[i] % BASE; prod_l[i] /= BASE; } @@ -186,7 +186,7 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale) size_t prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale))); /* Do the multiply */ - if (len1 <= BC_LONGABLE_DIGITS && len2 <= BC_LONGABLE_DIGITS) { + if (len1 <= BC_MUL_UINT_DIGITS && len2 <= BC_MUL_UINT_DIGITS) { bc_fast_mul(n1, len1, n2, len2, &prod); } else { bc_standard_mul(n1, len1, n2, len2, &prod); From b23e0a392978fd37658a06cbc37feededc1e19fb Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Tue, 14 May 2024 08:22:55 +0900 Subject: [PATCH 06/15] Moved the constant --- ext/bcmath/libbcmath/src/private.h | 4 ---- ext/bcmath/libbcmath/src/recmul.c | 9 +++++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h index 7e367441292ee..1403baad0c257 100644 --- a/ext/bcmath/libbcmath/src/private.h +++ b/ext/bcmath/libbcmath/src/private.h @@ -84,13 +84,9 @@ static inline uint64_t BC_BSWAP64(uint64_t u) #if SIZEOF_SIZE_T >= 8 # define BC_BSWAP(u) BC_BSWAP64(u) # define BC_UINT_T uint64_t -# define BC_MUL_UINT_DIGITS 8 -# define BC_MUL_UINT_OVERFLOW 100000000 #else # define BC_BSWAP(u) BC_BSWAP32(u) # define BC_UINT_T uint32_t -# define BC_MUL_UINT_DIGITS 4 -# define BC_MUL_UINT_OVERFLOW 10000 #endif #ifdef WORDS_BIGENDIAN diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 0d916747e0e37..247e2a238cc53 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -37,6 +37,15 @@ #include "zend_alloc.h" +#if SIZEOF_SIZE_T >= 8 +# define BC_MUL_UINT_DIGITS 8 +# define BC_MUL_UINT_OVERFLOW 100000000 +#else +# define BC_MUL_UINT_DIGITS 4 +# define BC_MUL_UINT_OVERFLOW 10000 +#endif + + /* Multiply utility routines */ /* From ec51d7664e8516828851b8a312436f87245b24d9 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Tue, 14 May 2024 09:12:55 +0900 Subject: [PATCH 07/15] Fixed variable names and comments --- ext/bcmath/libbcmath/src/recmul.c | 38 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 247e2a238cc53..9a832d754c459 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -75,9 +75,9 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc char *n1end = n1->n_value + n1len - 1; char *n2end = n2->n_value + n2len - 1; - BC_UINT_T n1_l = bc_partial_convert_to_uint(n1end, n1len); - BC_UINT_T n2_l = bc_partial_convert_to_uint(n2end, n2len); - BC_UINT_T prod_l = n1_l * n2_l; + BC_UINT_T n1_uint = bc_partial_convert_to_uint(n1end, n1len); + BC_UINT_T n2_uint = bc_partial_convert_to_uint(n2end, n2len); + BC_UINT_T prod_uint = n1_uint * n2_uint; size_t prodlen = n1len + n2len; *prod = bc_new_num_nonzeroed(prodlen, 0); @@ -85,8 +85,8 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc char *pend = pptr + prodlen - 1; while (pend >= pptr) { - *pend-- = prod_l % BASE; - prod_l /= BASE; + *pend-- = prod_uint % BASE; + prod_uint /= BASE; } } @@ -111,19 +111,19 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu BC_UINT_T *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(BC_UINT_T)); - BC_UINT_T *n1_l = buf; - BC_UINT_T *n2_l = buf + n1_arr_size; - BC_UINT_T *prod_l = n2_l + n2_arr_size; + BC_UINT_T *n1_uint = buf; + BC_UINT_T *n2_uint = buf + n1_arr_size; + BC_UINT_T *prod_uint = n2_uint + n2_arr_size; for (i = 0; i < prod_arr_size; i++) { - prod_l[i] = 0; + prod_uint[i] = 0; } /* Convert n1 to uint[] */ i = 0; while (n1len > 0) { size_t len = MIN(BC_MUL_UINT_DIGITS, n1len); - n1_l[i] = bc_partial_convert_to_uint(n1end, len); + n1_uint[i] = bc_partial_convert_to_uint(n1end, len); n1end -= len; n1len -= len; i++; @@ -133,7 +133,7 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu i = 0; while (n2len > 0) { size_t len = MIN(BC_MUL_UINT_DIGITS, n2len); - n2_l[i] = bc_partial_convert_to_uint(n2end, len); + n2_uint[i] = bc_partial_convert_to_uint(n2end, len); n2end -= len; n2len -= len; i++; @@ -142,17 +142,17 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu /* Multiplication and addition */ for (i = 0; i < n1_arr_size; i++) { for (size_t j = 0; j < n2_arr_size; j++) { - prod_l[i + j] += n1_l[i] * n2_l[j]; + prod_uint[i + j] += n1_uint[i] * n2_uint[j]; } } /* - * Move a value exceeding 8 digits by carrying to the next digit. + * Move a value exceeding 4/8 digits by carrying to the next digit. * However, the last digit does nothing. */ for (i = 0; i < prod_arr_size - 1; i++) { - prod_l[i + 1] += prod_l[i] / BC_MUL_UINT_OVERFLOW; - prod_l[i] %= BC_MUL_UINT_OVERFLOW; + prod_uint[i + 1] += prod_uint[i] / BC_MUL_UINT_OVERFLOW; + prod_uint[i] %= BC_MUL_UINT_OVERFLOW; } /* Convert to bc_num */ @@ -162,8 +162,8 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu i = 0; while (i < prod_arr_size - 1) { for (size_t j = 0; j < BC_MUL_UINT_DIGITS; j++) { - *pend-- = prod_l[i] % BASE; - prod_l[i] /= BASE; + *pend-- = prod_uint[i] % BASE; + prod_uint[i] /= BASE; } i++; } @@ -173,8 +173,8 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu * Also need to fill it to the end with zeros, so loop until the end of the string. */ while (pend >= pptr) { - *pend-- = prod_l[i] % BASE; - prod_l[i] /= BASE; + *pend-- = prod_uint[i] % BASE; + prod_uint[i] /= BASE; } efree(buf); From 7e82dc1f6b0a0a1ccfd8463a622bbbe8be60735a Mon Sep 17 00:00:00 2001 From: Saki Takamachi <34942839+SakiTakamachi@users.noreply.github.com> Date: Wed, 15 May 2024 08:08:10 +0900 Subject: [PATCH 08/15] Fixed array size calculation Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> --- ext/bcmath/libbcmath/src/recmul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 9a832d754c459..e0e5cee3e198d 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -105,7 +105,7 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu char *n2end = n2->n_value + n2len - 1; size_t prodlen = n1len + n2len; - size_t n1_arr_size = n1len / BC_MUL_UINT_DIGITS + (n1len % BC_MUL_UINT_DIGITS ? 1 : 0); + size_t n1_arr_size = (n1len + BC_MUL_UINT_DIGITS - 1) / BC_MUL_UINT_DIGITS; size_t n2_arr_size = n2len / BC_MUL_UINT_DIGITS + (n2len % BC_MUL_UINT_DIGITS ? 1 : 0); size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; From 25b33bd32cebb0ac6d1596683960a102db7c18f7 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Wed, 15 May 2024 09:17:50 +0900 Subject: [PATCH 09/15] address comments --- ext/bcmath/libbcmath/src/recmul.c | 38 ++++++++++++++----------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index e0e5cee3e198d..c21551b01d217 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -66,6 +66,18 @@ static inline BC_UINT_T bc_partial_convert_to_uint(const char *n, size_t len) return num; } +static inline void bc_convert_to_uint(BC_UINT_T *n_uint, char *nend, size_t nlen) +{ + size_t i = 0; + while (nlen > 0) { + size_t len = MIN(BC_MUL_UINT_DIGITS, nlen); + n_uint[i] = bc_partial_convert_to_uint(nend, len); + nend -= len; + nlen -= len; + i++; + } +} + /* * If the n_values ​​of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less, * the calculation will be performed at high speed without using an array. @@ -106,10 +118,10 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu size_t prodlen = n1len + n2len; size_t n1_arr_size = (n1len + BC_MUL_UINT_DIGITS - 1) / BC_MUL_UINT_DIGITS; - size_t n2_arr_size = n2len / BC_MUL_UINT_DIGITS + (n2len % BC_MUL_UINT_DIGITS ? 1 : 0); + size_t n2_arr_size = (n2len + BC_MUL_UINT_DIGITS - 1) / BC_MUL_UINT_DIGITS; size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; - BC_UINT_T *buf = emalloc((n1_arr_size + n2_arr_size + prod_arr_size) * sizeof(BC_UINT_T)); + BC_UINT_T *buf = safe_emalloc(n1_arr_size + n2_arr_size + prod_arr_size, sizeof(BC_UINT_T), 0); BC_UINT_T *n1_uint = buf; BC_UINT_T *n2_uint = buf + n1_arr_size; @@ -119,25 +131,9 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_nu prod_uint[i] = 0; } - /* Convert n1 to uint[] */ - i = 0; - while (n1len > 0) { - size_t len = MIN(BC_MUL_UINT_DIGITS, n1len); - n1_uint[i] = bc_partial_convert_to_uint(n1end, len); - n1end -= len; - n1len -= len; - i++; - } - - /* Convert n2 to uint[] */ - i = 0; - while (n2len > 0) { - size_t len = MIN(BC_MUL_UINT_DIGITS, n2len); - n2_uint[i] = bc_partial_convert_to_uint(n2end, len); - n2end -= len; - n2len -= len; - i++; - } + /* Convert to uint[] */ + bc_convert_to_uint(n1_uint, n1end, n1len); + bc_convert_to_uint(n2_uint, n2end, n2len); /* Multiplication and addition */ for (i = 0; i < n1_arr_size; i++) { From 888ded69e4aff990f5fbb51e17548df255d0acf2 Mon Sep 17 00:00:00 2001 From: Saki Takamachi <34942839+SakiTakamachi@users.noreply.github.com> Date: Wed, 15 May 2024 19:36:11 +0900 Subject: [PATCH 10/15] nend to const Co-authored-by: Gina Peter Banyard --- ext/bcmath/libbcmath/src/recmul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index c21551b01d217..fd8680fd6a841 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -66,7 +66,7 @@ static inline BC_UINT_T bc_partial_convert_to_uint(const char *n, size_t len) return num; } -static inline void bc_convert_to_uint(BC_UINT_T *n_uint, char *nend, size_t nlen) +static inline void bc_convert_to_uint(BC_UINT_T *n_uint, const char *nend, size_t nlen) { size_t i = 0; while (nlen > 0) { From aa1a677d547580e00be778a36b087fe3460606cb Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Wed, 15 May 2024 19:43:50 +0900 Subject: [PATCH 11/15] n1end and n2end to const --- ext/bcmath/libbcmath/src/recmul.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index fd8680fd6a841..36083bc258dd3 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -113,8 +113,8 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) { size_t i; - char *n1end = n1->n_value + n1len - 1; - char *n2end = n2->n_value + n2len - 1; + const char *n1end = n1->n_value + n1len - 1; + const char *n2end = n2->n_value + n2len - 1; size_t prodlen = n1len + n2len; size_t n1_arr_size = (n1len + BC_MUL_UINT_DIGITS - 1) / BC_MUL_UINT_DIGITS; From dd401bdea96dd391717ab0646186d742ce1b2004 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Wed, 15 May 2024 23:51:52 +0900 Subject: [PATCH 12/15] n1end and n2end to const --- ext/bcmath/libbcmath/src/recmul.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 36083bc258dd3..4afd1344f3872 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -84,8 +84,8 @@ static inline void bc_convert_to_uint(BC_UINT_T *n_uint, const char *nend, size_ */ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) { - char *n1end = n1->n_value + n1len - 1; - char *n2end = n2->n_value + n2len - 1; + const char *n1end = n1->n_value + n1len - 1; + const char *n2end = n2->n_value + n2len - 1; BC_UINT_T n1_uint = bc_partial_convert_to_uint(n1end, n1len); BC_UINT_T n2_uint = bc_partial_convert_to_uint(n2end, n2len); From c7c977a5ecbfe1e5481c056c2189811e5a982e12 Mon Sep 17 00:00:00 2001 From: Saki Takamachi <34942839+SakiTakamachi@users.noreply.github.com> Date: Thu, 16 May 2024 07:44:10 +0900 Subject: [PATCH 13/15] fixed type Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> --- ext/bcmath/libbcmath/src/recmul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 4afd1344f3872..d4f81d7419e04 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -110,7 +110,7 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc * Multiply and add these groups of numbers to perform multiplication fast. * How much to shift the digits when adding values ​​can be calculated from the index of the array. */ -static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) +static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, bc_num *prod) { size_t i; const char *n1end = n1->n_value + n1len - 1; From dab7e153508a8c419d1495ff9286d72d0cae3ce6 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Thu, 16 May 2024 07:46:30 +0900 Subject: [PATCH 14/15] rm zero space --- ext/bcmath/libbcmath/src/recmul.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index d4f81d7419e04..573ba3dd9e59c 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -79,7 +79,7 @@ static inline void bc_convert_to_uint(BC_UINT_T *n_uint, const char *nend, size_ } /* - * If the n_values ​​of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less, + * If the n_values of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less, * the calculation will be performed at high speed without using an array. */ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod) @@ -108,7 +108,7 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc * e.g. 12345678901234567890 => {34567890, 56789012, 1234} * * Multiply and add these groups of numbers to perform multiplication fast. - * How much to shift the digits when adding values ​​can be calculated from the index of the array. + * How much to shift the digits when adding values can be calculated from the index of the array. */ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, bc_num *prod) { From 48b96dfb1bf5a91fd6fbca3742650fb9d6f5d6a4 Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Thu, 16 May 2024 07:50:25 +0900 Subject: [PATCH 15/15] added comment --- ext/bcmath/libbcmath/src/recmul.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 573ba3dd9e59c..bfd909f251022 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -121,6 +121,11 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, bc size_t n2_arr_size = (n2len + BC_MUL_UINT_DIGITS - 1) / BC_MUL_UINT_DIGITS; size_t prod_arr_size = n1_arr_size + n2_arr_size - 1; + /* + * let's say that N is the max of n1len and n2len (and a multiple of BC_MUL_UINT_DIGITS for simplicity), + * then this sum is <= N/BC_MUL_UINT_DIGITS + N/BC_MUL_UINT_DIGITS + N/BC_MUL_UINT_DIGITS + N/BC_MUL_UINT_DIGITS - 1 + * which is equal to N - 1 if BC_MUL_UINT_DIGITS is 4, and N/2 - 1 if BC_MUL_UINT_DIGITS is 8. + */ BC_UINT_T *buf = safe_emalloc(n1_arr_size + n2_arr_size + prod_arr_size, sizeof(BC_UINT_T), 0); BC_UINT_T *n1_uint = buf;