Skip to content

Commit b5653e8

Browse files
committed
Changed the bcmul calculation method
Multiplication is performed after converting to unsigned long, resulting in faster calculations.
1 parent 7203ca8 commit b5653e8

File tree

2 files changed

+120
-197
lines changed

2 files changed

+120
-197
lines changed

ext/bcmath/libbcmath/src/private.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,13 @@ static inline uint64_t BC_BSWAP64(uint64_t u)
8484
#if SIZEOF_SIZE_T >= 8
8585
# define BC_BSWAP(u) BC_BSWAP64(u)
8686
# define BC_UINT_T uint64_t
87+
# define BC_LONGABLE_DIGITS 8
88+
# define BC_LONGABLE_OVERFLOW 100000000
8789
#else
8890
# define BC_BSWAP(u) BC_BSWAP32(u)
8991
# define BC_UINT_T uint32_t
92+
# define BC_LONGABLE_DIGITS 4
93+
# define BC_LONGABLE_OVERFLOW 10000
9094
#endif
9195

9296
#ifdef WORDS_BIGENDIAN

ext/bcmath/libbcmath/src/recmul.c

Lines changed: 116 additions & 197 deletions
Original file line numberDiff line numberDiff line change
@@ -36,217 +36,134 @@
3636
#include "private.h" /* For _bc_rm_leading_zeros() */
3737
#include "zend_alloc.h"
3838

39-
/* Recursive vs non-recursive multiply crossover ranges. */
40-
#if defined(MULDIGITS)
41-
#include "muldigits.h"
42-
#else
43-
#define MUL_BASE_DIGITS 80
44-
#endif
45-
46-
int mul_base_digits = MUL_BASE_DIGITS;
47-
#define MUL_SMALL_DIGITS mul_base_digits/4
4839

4940
/* Multiply utility routines */
5041

51-
static bc_num new_sub_num(size_t length, size_t scale, char *value)
42+
/*
43+
* Converts BCD to long, going backwards from pointer n by the number of
44+
* characters specified by len.
45+
*/
46+
static inline unsigned long bc_partial_convert_to_long(const char *n, size_t len)
5247
{
53-
bc_num temp = (bc_num) emalloc(sizeof(bc_struct));
48+
unsigned long num = 0;
49+
unsigned long base = 1;
5450

55-
temp->n_sign = PLUS;
56-
temp->n_len = length;
57-
temp->n_scale = scale;
58-
temp->n_refs = 1;
59-
temp->n_value = value;
60-
return temp;
51+
for (size_t i = 0; i < len; i++) {
52+
num += *n * base;
53+
base *= BASE;
54+
n--;
55+
}
56+
57+
return num;
6158
}
6259

63-
static void _bc_simp_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod)
60+
/*
61+
* If the n_values ​​of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less,
62+
* the calculation will be performed at high speed without using an array.
63+
*/
64+
static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod)
6465
{
65-
char *n1ptr, *n2ptr, *pvptr;
66-
char *n1end, *n2end; /* To the end of n1 and n2. */
67-
int sum = 0;
66+
char *n1end = n1->n_value + n1len - 1;
67+
char *n2end = n2->n_value + n2len - 1;
6868

69-
int prodlen = n1len + n2len + 1;
69+
unsigned long n1_l = bc_partial_convert_to_long(n1end, n1len);
70+
unsigned long n2_l = bc_partial_convert_to_long(n2end, n2len);
71+
unsigned long prod_l = n1_l * n2_l;
7072

73+
size_t prodlen = n1len + n2len;
7174
*prod = bc_new_num_nonzeroed(prodlen, 0);
75+
char *pptr = (*prod)->n_value;
76+
char *pend = pptr + prodlen - 1;
7277

73-
n1end = (char *) (n1->n_value + n1len - 1);
74-
n2end = (char *) (n2->n_value + n2len - 1);
75-
pvptr = (char *) ((*prod)->n_value + prodlen - 1);
76-
77-
/* Here is the loop... */
78-
for (int index = 0; index < prodlen - 1; index++) {
79-
n1ptr = (char *) (n1end - MAX(0, index - n2len + 1));
80-
n2ptr = (char *) (n2end - MIN(index, n2len - 1));
81-
while ((n1ptr >= n1->n_value) && (n2ptr <= n2end)) {
82-
sum += *n1ptr * *n2ptr;
83-
n1ptr--;
84-
n2ptr++;
85-
}
86-
*pvptr-- = sum % BASE;
87-
sum = sum / BASE;
78+
while (pend >= pptr) {
79+
*pend-- = prod_l % BASE;
80+
prod_l /= BASE;
8881
}
89-
*pvptr = sum;
9082
}
9183

92-
93-
/* A special adder/subtractor for the recursive divide and conquer
94-
multiply algorithm. Note: if sub is called, accum must
95-
be larger that what is being subtracted. Also, accum and val
96-
must have n_scale = 0. (e.g. they must look like integers. *) */
97-
static void _bc_shift_addsub(bc_num accum, bc_num val, int shift, bool sub)
84+
/*
85+
* Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of unsigned longs.
86+
* The array is generated starting with the smaller digits.
87+
* e.g. 12345678901234567890 => {34567890, 56789012, 1234}
88+
*
89+
* Multiply and add these groups of numbers to perform multiplication fast.
90+
* How much to shift the digits when adding values ​​can be calculated from the index of the array.
91+
*/
92+
static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, int n2len, bc_num *prod)
9893
{
99-
signed char *accp, *valp;
100-
unsigned int carry = 0;
101-
size_t count = val->n_len;
102-
103-
if (val->n_value[0] == 0) {
104-
count--;
94+
size_t i;
95+
char *n1end = n1->n_value + n1len - 1;
96+
char *n2end = n2->n_value + n2len - 1;
97+
size_t prodlen = n1len + n2len;
98+
99+
size_t n1_arr_size = n1len / BC_LONGABLE_DIGITS + (n1len % BC_LONGABLE_DIGITS ? 1 : 0);
100+
size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0);
101+
size_t prod_arr_size = n1_arr_size + n2_arr_size - 1;
102+
103+
unsigned long n1_l[n1_arr_size];
104+
unsigned long n2_l[n2_arr_size];
105+
unsigned long prod_l[prod_arr_size];
106+
for (i = 0; i < prod_arr_size; i++) {
107+
prod_l[i] = 0;
105108
}
106-
assert(accum->n_len + accum->n_scale >= shift + count);
107-
108-
/* Set up pointers and others */
109-
accp = (signed char *) (accum->n_value + accum->n_len + accum->n_scale - shift - 1);
110-
valp = (signed char *) (val->n_value + val->n_len - 1);
111-
112-
if (sub) {
113-
/* Subtraction, carry is really borrow. */
114-
while (count--) {
115-
*accp -= *valp-- + carry;
116-
if (*accp < 0) {
117-
carry = 1;
118-
*accp-- += BASE;
119-
} else {
120-
carry = 0;
121-
accp--;
122-
}
123-
}
124-
while (carry) {
125-
*accp -= carry;
126-
if (*accp < 0) {
127-
*accp-- += BASE;
128-
} else {
129-
carry = 0;
130-
}
131-
}
132-
} else {
133-
/* Addition */
134-
while (count--) {
135-
*accp += *valp-- + carry;
136-
if (*accp > (BASE - 1)) {
137-
carry = 1;
138-
*accp-- -= BASE;
139-
} else {
140-
carry = 0;
141-
accp--;
142-
}
143-
}
144-
while (carry) {
145-
*accp += carry;
146-
if (*accp > (BASE - 1)) {
147-
*accp-- -= BASE;
148-
} else {
149-
carry = 0;
150-
}
151-
}
152-
}
153-
}
154-
155-
/* Recursive divide and conquer multiply algorithm.
156-
Based on
157-
Let u = u0 + u1*(b^n)
158-
Let v = v0 + v1*(b^n)
159-
Then uv = (B^2n+B^n)*u1*v1 + B^n*(u1-u0)*(v0-v1) + (B^n+1)*u0*v0
160109

161-
B is the base of storage, number of digits in u1,u0 close to equal.
162-
*/
163-
static void _bc_rec_mul(bc_num u, size_t ulen, bc_num v, size_t vlen, bc_num *prod)
164-
{
165-
bc_num u0, u1, v0, v1;
166-
bc_num m1, m2, m3;
167-
size_t n;
168-
bool m1zero;
169-
170-
/* Base case? */
171-
if ((ulen + vlen) < mul_base_digits
172-
|| ulen < MUL_SMALL_DIGITS
173-
|| vlen < MUL_SMALL_DIGITS
174-
) {
175-
_bc_simp_mul(u, ulen, v, vlen, prod);
176-
return;
110+
/* Convert n1 to long[] */
111+
i = 0;
112+
while (n1len > 0) {
113+
size_t len = MIN(BC_LONGABLE_DIGITS, n1len);
114+
n1_l[i] = bc_partial_convert_to_long(n1end, len);
115+
n1end -= len;
116+
n1len -= len;
117+
i++;
177118
}
178119

179-
/* Calculate n -- the u and v split point in digits. */
180-
n = (MAX(ulen, vlen) + 1) / 2;
181-
182-
/* Split u and v. */
183-
if (ulen < n) {
184-
u1 = bc_copy_num(BCG(_zero_));
185-
u0 = new_sub_num(ulen, 0, u->n_value);
186-
} else {
187-
u1 = new_sub_num(ulen - n, 0, u->n_value);
188-
u0 = new_sub_num(n, 0, u->n_value + ulen - n);
189-
}
190-
if (vlen < n) {
191-
v1 = bc_copy_num(BCG(_zero_));
192-
v0 = new_sub_num(vlen, 0, v->n_value);
193-
} else {
194-
v1 = new_sub_num(vlen - n, 0, v->n_value);
195-
v0 = new_sub_num(n, 0, v->n_value + vlen - n);
120+
/* Convert n2 to long[] */
121+
i = 0;
122+
while (n2len > 0) {
123+
size_t len = MIN(BC_LONGABLE_DIGITS, n2len);
124+
n2_l[i] = bc_partial_convert_to_long(n2end, len);
125+
n2end -= len;
126+
n2len -= len;
127+
i++;
196128
}
197-
_bc_rm_leading_zeros(u1);
198-
_bc_rm_leading_zeros(u0);
199-
_bc_rm_leading_zeros(v1);
200-
_bc_rm_leading_zeros(v0);
201-
202-
m1zero = bc_is_zero(u1) || bc_is_zero(v1);
203-
204-
/* Calculate sub results ... */
205-
206-
bc_num d1 = bc_sub(u1, u0, 0);
207-
bc_num d2 = bc_sub(v0, v1, 0);
208-
209129

210-
/* Do recursive multiplies and shifted adds. */
211-
if (m1zero) {
212-
m1 = bc_copy_num(BCG(_zero_));
213-
} else {
214-
_bc_rec_mul(u1, u1->n_len, v1, v1->n_len, &m1);
130+
/* Multiplication and addition */
131+
for (i = 0; i < n1_arr_size; i++) {
132+
for (size_t j = 0; j < n2_arr_size; j++) {
133+
prod_l[i + j] += n1_l[i] * n2_l[j];
134+
}
215135
}
216136

217-
if (bc_is_zero(d1) || bc_is_zero(d2)) {
218-
m2 = bc_copy_num(BCG(_zero_));
219-
} else {
220-
_bc_rec_mul(d1, d1->n_len, d2, d2->n_len, &m2);
137+
/*
138+
* Move a value exceeding 8 digits by carrying to the next digit.
139+
* However, the last digit does nothing.
140+
*/
141+
for (i = 0; i < prod_arr_size - 1; i++) {
142+
prod_l[i + 1] += prod_l[i] / BC_LONGABLE_OVERFLOW;
143+
prod_l[i] %= BC_LONGABLE_OVERFLOW;
221144
}
222145

223-
if (bc_is_zero(u0) || bc_is_zero(v0)) {
224-
m3 = bc_copy_num(BCG(_zero_));
225-
} else {
226-
_bc_rec_mul(u0, u0->n_len, v0, v0->n_len, &m3);
146+
/* Convert to bc_num */
147+
*prod = bc_new_num_nonzeroed(prodlen, 0);
148+
char *pptr = (*prod)->n_value;
149+
char *pend = pptr + prodlen - 1;
150+
i = 0;
151+
while (i < prod_arr_size - 1) {
152+
for (size_t j = 0; j < BC_LONGABLE_DIGITS; j++) {
153+
*pend-- = prod_l[i] % BASE;
154+
prod_l[i] /= BASE;
155+
}
156+
i++;
227157
}
228158

229-
/* Initialize product */
230-
*prod = bc_new_num(ulen + vlen + 1, 0);
231-
232-
if (!m1zero) {
233-
_bc_shift_addsub(*prod, m1, 2 * n, false);
234-
_bc_shift_addsub(*prod, m1, n, false);
159+
/*
160+
* The last digit may carry over.
161+
* Also need to fill it to the end with zeros, so loop until the end of the string.
162+
*/
163+
while (pend >= pptr) {
164+
*pend-- = prod_l[i] % BASE;
165+
prod_l[i] /= BASE;
235166
}
236-
_bc_shift_addsub(*prod, m3, n, false);
237-
_bc_shift_addsub(*prod, m3, 0, false);
238-
_bc_shift_addsub(*prod, m2, n, d1->n_sign != d2->n_sign);
239-
240-
/* Now clean up! */
241-
bc_free_num (&u1);
242-
bc_free_num (&u0);
243-
bc_free_num (&v1);
244-
bc_free_num (&m1);
245-
bc_free_num (&v0);
246-
bc_free_num (&m2);
247-
bc_free_num (&m3);
248-
bc_free_num (&d1);
249-
bc_free_num (&d2);
250167
}
251168

252169
/* The multiply routine. N2 times N1 is put int PROD with the scale of
@@ -255,26 +172,28 @@ static void _bc_rec_mul(bc_num u, size_t ulen, bc_num v, size_t vlen, bc_num *pr
255172

256173
bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale)
257174
{
258-
bc_num pval;
259-
size_t len1, len2;
260-
size_t full_scale, prod_scale;
175+
bc_num prod;
261176

262177
/* Initialize things. */
263-
len1 = n1->n_len + n1->n_scale;
264-
len2 = n2->n_len + n2->n_scale;
265-
full_scale = n1->n_scale + n2->n_scale;
266-
prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale)));
178+
size_t len1 = n1->n_len + n1->n_scale;
179+
size_t len2 = n2->n_len + n2->n_scale;
180+
size_t full_scale = n1->n_scale + n2->n_scale;
181+
size_t prod_scale = MIN(full_scale, MAX(scale, MAX(n1->n_scale, n2->n_scale)));
267182

268183
/* Do the multiply */
269-
_bc_rec_mul(n1, len1, n2, len2, &pval);
184+
if (len1 <= BC_LONGABLE_DIGITS && len2 <= BC_LONGABLE_DIGITS) {
185+
bc_fast_mul(n1, len1, n2, len2, &prod);
186+
} else {
187+
bc_standard_mul(n1, len1, n2, len2, &prod);
188+
}
270189

271190
/* Assign to prod and clean up the number. */
272-
pval->n_sign = (n1->n_sign == n2->n_sign ? PLUS : MINUS);
273-
pval->n_len = len2 + len1 + 1 - full_scale;
274-
pval->n_scale = prod_scale;
275-
_bc_rm_leading_zeros(pval);
276-
if (bc_is_zero(pval)) {
277-
pval->n_sign = PLUS;
191+
prod->n_sign = (n1->n_sign == n2->n_sign ? PLUS : MINUS);
192+
prod->n_len -= full_scale;
193+
prod->n_scale = prod_scale;
194+
_bc_rm_leading_zeros(prod);
195+
if (bc_is_zero(prod)) {
196+
prod->n_sign = PLUS;
278197
}
279-
return pval;
198+
return prod;
280199
}

0 commit comments

Comments
 (0)