Skip to content

ext/bcmath: bcpow() performance improvement #15790

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions ext/bcmath/libbcmath/src/bcmath.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,14 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale);
*(result) = mul_ex; \
} while (0)

bc_num bc_square(bc_num n1, size_t scale);

#define bc_square_ex(n1, result, scale_min) do { \
bc_num square_ex = bc_square(n1, scale_min); \
bc_free_num (result); \
*(result) = square_ex; \
} while (0)

bool bc_divide(bc_num n1, bc_num n2, bc_num *quot, size_t scale);

bool bc_modulo(bc_num num1, bc_num num2, bc_num *resul, size_t scale);
Expand Down
4 changes: 2 additions & 2 deletions ext/bcmath/libbcmath/src/raise.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
pwrscale = num1->n_scale;
while ((exponent & 1) == 0) {
pwrscale = 2 * pwrscale;
bc_multiply_ex(power, power, &power, pwrscale);
bc_square_ex(power, &power, pwrscale);
exponent = exponent >> 1;
}
temp = bc_copy_num(power);
Expand All @@ -79,7 +79,7 @@ void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
/* Do the calculation. */
while (exponent > 0) {
pwrscale = 2 * pwrscale;
bc_multiply_ex(power, power, &power, pwrscale);
bc_square_ex(power, &power, pwrscale);
if ((exponent & 1) == 1) {
calcscale = pwrscale + calcscale;
bc_multiply_ex(temp, power, &temp, calcscale);
Expand Down
120 changes: 119 additions & 1 deletion ext/bcmath/libbcmath/src/recmul.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,24 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len,
}
}

static inline void bc_fast_square(bc_num n1, size_t n1len, bc_num *prod)
{
const char *n1end = n1->n_value + n1len - 1;

BC_VECTOR n1_vector = bc_partial_convert_to_vector(n1end, n1len);
BC_VECTOR prod_vector = n1_vector * n1_vector;

size_t prodlen = n1len + n1len;
*prod = bc_new_num_nonzeroed(prodlen, 0);
char *pptr = (*prod)->n_value;
char *pend = pptr + prodlen - 1;

while (pend >= pptr) {
*pend-- = prod_vector % BASE;
prod_vector /= BASE;
}
}

/*
* Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_VECTOR.
* The array is generated starting with the smaller digits.
Expand Down Expand Up @@ -163,7 +181,82 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, bc
efree(buf);
}

/* The multiply routine. N2 times N1 is put int PROD with the scale of
/** This is bc_standard_mul implementation for square */
static void bc_standard_square(bc_num n1, size_t n1len, bc_num *prod)
{
size_t i;
const char *n1end = n1->n_value + n1len - 1;
size_t prodlen = n1len + n1len;

size_t n1_arr_size = (n1len + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
size_t prod_arr_size = (prodlen + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;

BC_VECTOR *buf = safe_emalloc(n1_arr_size + n1_arr_size + prod_arr_size, sizeof(BC_VECTOR), 0);

BC_VECTOR *n1_vector = buf;
BC_VECTOR *prod_vector = n1_vector + n1_arr_size + n1_arr_size;

for (i = 0; i < prod_arr_size; i++) {
prod_vector[i] = 0;
}

/* Convert to BC_VECTOR[] */
bc_convert_to_vector(n1_vector, n1end, n1len);

/* Multiplication and addition */
size_t count = 0;
for (i = 0; i < n1_arr_size; i++) {
/*
* This calculation adds the result multiple times to the array entries.
* When multiplying large numbers of digits, there is a possibility of
* overflow, so digit adjustment is performed beforehand.
*/
if (UNEXPECTED(count >= BC_VECTOR_NO_OVERFLOW_ADD_COUNT)) {
bc_mul_carry_calc(prod_vector, prod_arr_size);
count = 0;
}
count++;
for (size_t j = 0; j < n1_arr_size; j++) {
prod_vector[i + j] += n1_vector[i] * n1_vector[j];
}
}

/*
* Move a value exceeding 4/8 digits by carrying to the next digit.
* However, the last digit does nothing.
*/
bc_mul_carry_calc(prod_vector, prod_arr_size);

/* Convert to bc_num */
*prod = bc_new_num_nonzeroed(prodlen, 0);
char *pptr = (*prod)->n_value;
char *pend = pptr + prodlen - 1;
i = 0;
while (i < prod_arr_size - 1) {
#if BC_VECTOR_SIZE == 4
bc_write_bcd_representation(prod_vector[i], pend - 3);
pend -= 4;
#else
bc_write_bcd_representation(prod_vector[i] / 10000, pend - 7);
bc_write_bcd_representation(prod_vector[i] % 10000, pend - 3);
pend -= 8;
#endif
i++;
}

/*
* The last digit may carry over.
* Also need to fill it to the end with zeros, so loop until the end of the string.
*/
while (pend >= pptr) {
*pend-- = prod_vector[i] % BASE;
prod_vector[i] /= BASE;
}

efree(buf);
}

/* The multiply routine. N2 times N1 is put int PROD with the scale of
the result being MIN(N2 scale+N1 scale, MAX (SCALE, N2 scale, N1 scale)).
*/

Expand Down Expand Up @@ -194,3 +287,28 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale)
}
return prod;
}

bc_num bc_square(bc_num n1, size_t scale)
{
bc_num prod;

/* Initialize things. */
size_t len1 = n1->n_len + n1->n_scale;
size_t full_scale = n1->n_scale + n1->n_scale;
size_t prod_scale = MIN(full_scale, MAX(scale, n1->n_scale));

/* Do the square */
if (len1 <= BC_VECTOR_SIZE) {
bc_fast_square(n1, len1, &prod);
} else {
bc_standard_square(n1, len1, &prod);
}

/* Assign to prod and clean up the number. */
prod->n_sign = PLUS;
prod->n_len -= full_scale;
prod->n_scale = prod_scale;
_bc_rm_leading_zeros(prod);

return prod;
}
Loading