Skip to content

Commit 306dedc

Browse files
authored
ext/bcmath: bcpow() performance improvement (php#15790)
* Added function for squaring to improve performance of power calculation * Aligned backslashes * Removed unnecessary comments * Extracted common part of multiplication and square functions * Added comment to bc_fast_square * Improved wording of bc_mul_finish_from_vector * Reused new function name * Replaced macro with function
1 parent 36dfe63 commit 306dedc

File tree

3 files changed

+134
-35
lines changed

3 files changed

+134
-35
lines changed

ext/bcmath/libbcmath/src/bcmath.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale);
150150
*(result) = mul_ex; \
151151
} while (0)
152152

153+
bc_num bc_square(bc_num n1, size_t scale);
154+
153155
bool bc_divide(bc_num n1, bc_num n2, bc_num *quot, size_t scale);
154156

155157
bool bc_modulo(bc_num num1, bc_num num2, bc_num *resul, size_t scale);

ext/bcmath/libbcmath/src/raise.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,16 @@
3434
#include <stdbool.h>
3535
#include <stddef.h>
3636

37+
void bc_square_ex(bc_num n1, bc_num *result, size_t scale_min) {
38+
bc_num square_ex = bc_square(n1, scale_min);
39+
bc_free_num(result);
40+
*(result) = square_ex;
41+
}
3742

3843
/* Raise NUM1 to the NUM2 power. The result is placed in RESULT.
3944
Maximum exponent is LONG_MAX. If a NUM2 is not an integer,
4045
only the integer part is used. */
41-
42-
void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
43-
{
46+
void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale) {
4447
bc_num temp, power;
4548
size_t rscale;
4649
size_t pwrscale;
@@ -69,7 +72,7 @@ void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
6972
pwrscale = num1->n_scale;
7073
while ((exponent & 1) == 0) {
7174
pwrscale = 2 * pwrscale;
72-
bc_multiply_ex(power, power, &power, pwrscale);
75+
bc_square_ex(power, &power, pwrscale);
7376
exponent = exponent >> 1;
7477
}
7578
temp = bc_copy_num(power);
@@ -79,7 +82,7 @@ void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
7982
/* Do the calculation. */
8083
while (exponent > 0) {
8184
pwrscale = 2 * pwrscale;
82-
bc_multiply_ex(power, power, &power, pwrscale);
85+
bc_square_ex(power, &power, pwrscale);
8386
if ((exponent & 1) == 1) {
8487
calcscale = pwrscale + calcscale;
8588
bc_multiply_ex(temp, power, &temp, calcscale);
@@ -100,8 +103,7 @@ void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
100103
}
101104

102105
/* This is used internally by BCMath */
103-
void bc_raise_bc_exponent(bc_num base, bc_num expo, bc_num *result, size_t scale)
104-
{
106+
void bc_raise_bc_exponent(bc_num base, bc_num expo, bc_num *result, size_t scale) {
105107
/* Exponent must not have fractional part */
106108
assert(expo->n_scale == 0);
107109

ext/bcmath/libbcmath/src/recmul.c

Lines changed: 123 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,64 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len,
7272
}
7373
}
7474

75+
/*
76+
* Equivalent of bc_fast_mul for small numbers to perform computations
77+
* without using array.
78+
*/
79+
static inline void bc_fast_square(bc_num n1, size_t n1len, bc_num *prod)
80+
{
81+
const char *n1end = n1->n_value + n1len - 1;
82+
83+
BC_VECTOR n1_vector = bc_partial_convert_to_vector(n1end, n1len);
84+
BC_VECTOR prod_vector = n1_vector * n1_vector;
85+
86+
size_t prodlen = n1len + n1len;
87+
*prod = bc_new_num_nonzeroed(prodlen, 0);
88+
char *pptr = (*prod)->n_value;
89+
char *pend = pptr + prodlen - 1;
90+
91+
while (pend >= pptr) {
92+
*pend-- = prod_vector % BASE;
93+
prod_vector /= BASE;
94+
}
95+
}
96+
97+
/* Common part of functions bc_standard_mul and bc_standard_square
98+
* that takes a vector and converts it to a bc_num */
99+
static inline void bc_mul_finish_from_vector(BC_VECTOR *prod_vector, size_t prod_arr_size, size_t prodlen, bc_num *prod) {
100+
/*
101+
* Move a value exceeding 4/8 digits by carrying to the next digit.
102+
* However, the last digit does nothing.
103+
*/
104+
bc_mul_carry_calc(prod_vector, prod_arr_size);
105+
106+
/* Convert to bc_num */
107+
*prod = bc_new_num_nonzeroed(prodlen, 0);
108+
char *pptr = (*prod)->n_value;
109+
char *pend = pptr + prodlen - 1;
110+
size_t i = 0;
111+
while (i < prod_arr_size - 1) {
112+
#if BC_VECTOR_SIZE == 4
113+
bc_write_bcd_representation(prod_vector[i], pend - 3);
114+
pend -= 4;
115+
#else
116+
bc_write_bcd_representation(prod_vector[i] / 10000, pend - 7);
117+
bc_write_bcd_representation(prod_vector[i] % 10000, pend - 3);
118+
pend -= 8;
119+
#endif
120+
i++;
121+
}
122+
123+
/*
124+
* The last digit may carry over.
125+
* Also need to fill it to the end with zeros, so loop until the end of the string.
126+
*/
127+
while (pend >= pptr) {
128+
*pend-- = prod_vector[i] % BASE;
129+
prod_vector[i] /= BASE;
130+
}
131+
}
132+
75133
/*
76134
* Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_VECTOR.
77135
* The array is generated starting with the smaller digits.
@@ -128,42 +186,57 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, bc
128186
}
129187
}
130188

131-
/*
132-
* Move a value exceeding 4/8 digits by carrying to the next digit.
133-
* However, the last digit does nothing.
134-
*/
135-
bc_mul_carry_calc(prod_vector, prod_arr_size);
189+
bc_mul_finish_from_vector(prod_vector, prod_arr_size, prodlen, prod);
136190

137-
/* Convert to bc_num */
138-
*prod = bc_new_num_nonzeroed(prodlen, 0);
139-
char *pptr = (*prod)->n_value;
140-
char *pend = pptr + prodlen - 1;
141-
i = 0;
142-
while (i < prod_arr_size - 1) {
143-
#if BC_VECTOR_SIZE == 4
144-
bc_write_bcd_representation(prod_vector[i], pend - 3);
145-
pend -= 4;
146-
#else
147-
bc_write_bcd_representation(prod_vector[i] / 10000, pend - 7);
148-
bc_write_bcd_representation(prod_vector[i] % 10000, pend - 3);
149-
pend -= 8;
150-
#endif
151-
i++;
191+
efree(buf);
192+
}
193+
194+
/** This is bc_standard_mul implementation for square */
195+
static void bc_standard_square(bc_num n1, size_t n1len, bc_num *prod)
196+
{
197+
size_t i;
198+
const char *n1end = n1->n_value + n1len - 1;
199+
size_t prodlen = n1len + n1len;
200+
201+
size_t n1_arr_size = (n1len + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
202+
size_t prod_arr_size = (prodlen + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
203+
204+
BC_VECTOR *buf = safe_emalloc(n1_arr_size + n1_arr_size + prod_arr_size, sizeof(BC_VECTOR), 0);
205+
206+
BC_VECTOR *n1_vector = buf;
207+
BC_VECTOR *prod_vector = n1_vector + n1_arr_size + n1_arr_size;
208+
209+
for (i = 0; i < prod_arr_size; i++) {
210+
prod_vector[i] = 0;
152211
}
153212

154-
/*
155-
* The last digit may carry over.
156-
* Also need to fill it to the end with zeros, so loop until the end of the string.
157-
*/
158-
while (pend >= pptr) {
159-
*pend-- = prod_vector[i] % BASE;
160-
prod_vector[i] /= BASE;
213+
/* Convert to BC_VECTOR[] */
214+
bc_convert_to_vector(n1_vector, n1end, n1len);
215+
216+
/* Multiplication and addition */
217+
size_t count = 0;
218+
for (i = 0; i < n1_arr_size; i++) {
219+
/*
220+
* This calculation adds the result multiple times to the array entries.
221+
* When multiplying large numbers of digits, there is a possibility of
222+
* overflow, so digit adjustment is performed beforehand.
223+
*/
224+
if (UNEXPECTED(count >= BC_VECTOR_NO_OVERFLOW_ADD_COUNT)) {
225+
bc_mul_carry_calc(prod_vector, prod_arr_size);
226+
count = 0;
227+
}
228+
count++;
229+
for (size_t j = 0; j < n1_arr_size; j++) {
230+
prod_vector[i + j] += n1_vector[i] * n1_vector[j];
231+
}
161232
}
162233

234+
bc_mul_finish_from_vector(prod_vector, prod_arr_size, prodlen, prod);
235+
163236
efree(buf);
164237
}
165238

166-
/* The multiply routine. N2 times N1 is put int PROD with the scale of
239+
/* The multiply routine. N2 times N1 is put int PROD with the scale of
167240
the result being MIN(N2 scale+N1 scale, MAX (SCALE, N2 scale, N1 scale)).
168241
*/
169242

@@ -194,3 +267,25 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale)
194267
}
195268
return prod;
196269
}
270+
271+
bc_num bc_square(bc_num n1, size_t scale)
272+
{
273+
bc_num prod;
274+
275+
size_t len1 = n1->n_len + n1->n_scale;
276+
size_t full_scale = n1->n_scale + n1->n_scale;
277+
size_t prod_scale = MIN(full_scale, MAX(scale, n1->n_scale));
278+
279+
if (len1 <= BC_VECTOR_SIZE) {
280+
bc_fast_square(n1, len1, &prod);
281+
} else {
282+
bc_standard_square(n1, len1, &prod);
283+
}
284+
285+
prod->n_sign = PLUS;
286+
prod->n_len -= full_scale;
287+
prod->n_scale = prod_scale;
288+
_bc_rm_leading_zeros(prod);
289+
290+
return prod;
291+
}

0 commit comments

Comments
 (0)