Skip to content

ext/bcmath: bcpow() performance improvement #15790

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ext/bcmath/libbcmath/src/bcmath.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale);
*(result) = mul_ex; \
} while (0)

bc_num bc_square(bc_num n1, size_t scale);

bool bc_divide(bc_num n1, bc_num n2, bc_num *quot, size_t scale);

bool bc_modulo(bc_num num1, bc_num num2, bc_num *resul, size_t scale);
Expand Down
16 changes: 9 additions & 7 deletions ext/bcmath/libbcmath/src/raise.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@
#include <stdbool.h>
#include <stddef.h>

void bc_square_ex(bc_num n1, bc_num *result, size_t scale_min) {
bc_num square_ex = bc_square(n1, scale_min);
bc_free_num(result);
*(result) = square_ex;
}

/* Raise NUM1 to the NUM2 power. The result is placed in RESULT.
Maximum exponent is LONG_MAX. If a NUM2 is not an integer,
only the integer part is used. */

void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
{
void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale) {
bc_num temp, power;
size_t rscale;
size_t pwrscale;
Expand Down Expand Up @@ -69,7 +72,7 @@ void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
pwrscale = num1->n_scale;
while ((exponent & 1) == 0) {
pwrscale = 2 * pwrscale;
bc_multiply_ex(power, power, &power, pwrscale);
bc_square_ex(power, &power, pwrscale);
exponent = exponent >> 1;
}
temp = bc_copy_num(power);
Expand All @@ -79,7 +82,7 @@ void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
/* Do the calculation. */
while (exponent > 0) {
pwrscale = 2 * pwrscale;
bc_multiply_ex(power, power, &power, pwrscale);
bc_square_ex(power, &power, pwrscale);
if ((exponent & 1) == 1) {
calcscale = pwrscale + calcscale;
bc_multiply_ex(temp, power, &temp, calcscale);
Expand All @@ -102,8 +105,7 @@ void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
}

/* This is used internally by BCMath */
void bc_raise_bc_exponent(bc_num base, bc_num expo, bc_num *result, size_t scale)
{
void bc_raise_bc_exponent(bc_num base, bc_num expo, bc_num *result, size_t scale) {
/* Exponent must not have fractional part */
assert(expo->n_scale == 0);

Expand Down
151 changes: 123 additions & 28 deletions ext/bcmath/libbcmath/src/recmul.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,64 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len,
}
}

/*
* Equivalent of bc_fast_mul for small numbers to perform computations
* without using array.
*/
static inline void bc_fast_square(bc_num n1, size_t n1len, bc_num *prod)
{
const char *n1end = n1->n_value + n1len - 1;

BC_VECTOR n1_vector = bc_partial_convert_to_vector(n1end, n1len);
BC_VECTOR prod_vector = n1_vector * n1_vector;

size_t prodlen = n1len + n1len;
*prod = bc_new_num_nonzeroed(prodlen, 0);
char *pptr = (*prod)->n_value;
char *pend = pptr + prodlen - 1;

while (pend >= pptr) {
*pend-- = prod_vector % BASE;
prod_vector /= BASE;
}
}

/* Common part of functions bc_standard_mul and bc_standard_square
* that takes a vector and converts it to a bc_num */
static inline void bc_mul_finish_from_vector(BC_VECTOR *prod_vector, size_t prod_arr_size, size_t prodlen, bc_num *prod) {
/*
* Move a value exceeding 4/8 digits by carrying to the next digit.
* However, the last digit does nothing.
*/
bc_mul_carry_calc(prod_vector, prod_arr_size);

/* Convert to bc_num */
*prod = bc_new_num_nonzeroed(prodlen, 0);
char *pptr = (*prod)->n_value;
char *pend = pptr + prodlen - 1;
size_t i = 0;
while (i < prod_arr_size - 1) {
#if BC_VECTOR_SIZE == 4
bc_write_bcd_representation(prod_vector[i], pend - 3);
pend -= 4;
#else
bc_write_bcd_representation(prod_vector[i] / 10000, pend - 7);
bc_write_bcd_representation(prod_vector[i] % 10000, pend - 3);
pend -= 8;
#endif
i++;
}

/*
* The last digit may carry over.
* Also need to fill it to the end with zeros, so loop until the end of the string.
*/
while (pend >= pptr) {
*pend-- = prod_vector[i] % BASE;
prod_vector[i] /= BASE;
}
}

/*
* Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_VECTOR.
* The array is generated starting with the smaller digits.
Expand Down Expand Up @@ -128,42 +186,57 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, bc
}
}

/*
* Move a value exceeding 4/8 digits by carrying to the next digit.
* However, the last digit does nothing.
*/
bc_mul_carry_calc(prod_vector, prod_arr_size);
bc_mul_finish_from_vector(prod_vector, prod_arr_size, prodlen, prod);

/* Convert to bc_num */
*prod = bc_new_num_nonzeroed(prodlen, 0);
char *pptr = (*prod)->n_value;
char *pend = pptr + prodlen - 1;
i = 0;
while (i < prod_arr_size - 1) {
#if BC_VECTOR_SIZE == 4
bc_write_bcd_representation(prod_vector[i], pend - 3);
pend -= 4;
#else
bc_write_bcd_representation(prod_vector[i] / 10000, pend - 7);
bc_write_bcd_representation(prod_vector[i] % 10000, pend - 3);
pend -= 8;
#endif
i++;
efree(buf);
}

/** This is bc_standard_mul implementation for square */
static void bc_standard_square(bc_num n1, size_t n1len, bc_num *prod)
{
size_t i;
const char *n1end = n1->n_value + n1len - 1;
size_t prodlen = n1len + n1len;

size_t n1_arr_size = (n1len + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
size_t prod_arr_size = (prodlen + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;

BC_VECTOR *buf = safe_emalloc(n1_arr_size + n1_arr_size + prod_arr_size, sizeof(BC_VECTOR), 0);

BC_VECTOR *n1_vector = buf;
BC_VECTOR *prod_vector = n1_vector + n1_arr_size + n1_arr_size;

for (i = 0; i < prod_arr_size; i++) {
prod_vector[i] = 0;
}

/*
* The last digit may carry over.
* Also need to fill it to the end with zeros, so loop until the end of the string.
*/
while (pend >= pptr) {
*pend-- = prod_vector[i] % BASE;
prod_vector[i] /= BASE;
/* Convert to BC_VECTOR[] */
bc_convert_to_vector(n1_vector, n1end, n1len);

/* Multiplication and addition */
size_t count = 0;
for (i = 0; i < n1_arr_size; i++) {
/*
* This calculation adds the result multiple times to the array entries.
* When multiplying large numbers of digits, there is a possibility of
* overflow, so digit adjustment is performed beforehand.
*/
if (UNEXPECTED(count >= BC_VECTOR_NO_OVERFLOW_ADD_COUNT)) {
bc_mul_carry_calc(prod_vector, prod_arr_size);
count = 0;
}
count++;
for (size_t j = 0; j < n1_arr_size; j++) {
prod_vector[i + j] += n1_vector[i] * n1_vector[j];
}
}

bc_mul_finish_from_vector(prod_vector, prod_arr_size, prodlen, prod);

efree(buf);
}

/* The multiply routine. N2 times N1 is put int PROD with the scale of
/* The multiply routine. N2 times N1 is put int PROD with the scale of
the result being MIN(N2 scale+N1 scale, MAX (SCALE, N2 scale, N1 scale)).
*/

Expand Down Expand Up @@ -194,3 +267,25 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale)
}
return prod;
}

bc_num bc_square(bc_num n1, size_t scale)
{
bc_num prod;

size_t len1 = n1->n_len + n1->n_scale;
size_t full_scale = n1->n_scale + n1->n_scale;
size_t prod_scale = MIN(full_scale, MAX(scale, n1->n_scale));

if (len1 <= BC_VECTOR_SIZE) {
bc_fast_square(n1, len1, &prod);
} else {
bc_standard_square(n1, len1, &prod);
}

prod->n_sign = PLUS;
prod->n_len -= full_scale;
prod->n_scale = prod_scale;
_bc_rm_leading_zeros(prod);

return prod;
}
Loading