36
36
#include "private.h" /* For _bc_rm_leading_zeros() */
37
37
#include "zend_alloc.h"
38
38
39
- /* Recursive vs non-recursive multiply crossover ranges. */
40
- #if defined(MULDIGITS )
41
- #include "muldigits.h"
42
- #else
43
- #define MUL_BASE_DIGITS 80
44
- #endif
45
-
46
- int mul_base_digits = MUL_BASE_DIGITS ;
47
- #define MUL_SMALL_DIGITS mul_base_digits/4
48
39
49
40
/* Multiply utility routines */
50
41
51
- static bc_num new_sub_num (size_t length , size_t scale , char * value )
42
+ /*
43
+ * Converts BCD to long, going backwards from pointer n by the number of
44
+ * characters specified by len.
45
+ */
46
+ static inline unsigned long bc_partial_convert_to_long (const char * n , size_t len )
52
47
{
53
- bc_num temp = (bc_num ) emalloc (sizeof (bc_struct ));
48
+ unsigned long num = 0 ;
49
+ unsigned long base = 1 ;
54
50
55
- temp -> n_sign = PLUS ;
56
- temp -> n_len = length ;
57
- temp -> n_scale = scale ;
58
- temp -> n_refs = 1 ;
59
- temp -> n_value = value ;
60
- return temp ;
51
+ for (size_t i = 0 ; i < len ; i ++ ) {
52
+ num += * n * base ;
53
+ base *= BASE ;
54
+ n -- ;
55
+ }
56
+
57
+ return num ;
61
58
}
62
59
63
- static void _bc_simp_mul (bc_num n1 , size_t n1len , bc_num n2 , int n2len , bc_num * prod )
60
+ /*
61
+ * If the n_values of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less,
62
+ * the calculation will be performed at high speed without using an array.
63
+ */
64
+ static inline void bc_fast_mul (bc_num n1 , size_t n1len , bc_num n2 , int n2len , bc_num * prod )
64
65
{
65
- char * n1ptr , * n2ptr , * pvptr ;
66
- char * n1end , * n2end ; /* To the end of n1 and n2. */
67
- int sum = 0 ;
66
+ char * n1end = n1 -> n_value + n1len - 1 ;
67
+ char * n2end = n2 -> n_value + n2len - 1 ;
68
68
69
- int prodlen = n1len + n2len + 1 ;
69
+ unsigned long n1_l = bc_partial_convert_to_long (n1end , n1len );
70
+ unsigned long n2_l = bc_partial_convert_to_long (n2end , n2len );
71
+ unsigned long prod_l = n1_l * n2_l ;
70
72
73
+ size_t prodlen = n1len + n2len ;
71
74
* prod = bc_new_num_nonzeroed (prodlen , 0 );
75
+ char * pptr = (* prod )-> n_value ;
76
+ char * pend = pptr + prodlen - 1 ;
72
77
73
- n1end = (char * ) (n1 -> n_value + n1len - 1 );
74
- n2end = (char * ) (n2 -> n_value + n2len - 1 );
75
- pvptr = (char * ) ((* prod )-> n_value + prodlen - 1 );
76
-
77
- /* Here is the loop... */
78
- for (int index = 0 ; index < prodlen - 1 ; index ++ ) {
79
- n1ptr = (char * ) (n1end - MAX (0 , index - n2len + 1 ));
80
- n2ptr = (char * ) (n2end - MIN (index , n2len - 1 ));
81
- while ((n1ptr >= n1 -> n_value ) && (n2ptr <= n2end )) {
82
- sum += * n1ptr * * n2ptr ;
83
- n1ptr -- ;
84
- n2ptr ++ ;
85
- }
86
- * pvptr -- = sum % BASE ;
87
- sum = sum / BASE ;
78
+ while (pend >= pptr ) {
79
+ * pend -- = prod_l % BASE ;
80
+ prod_l /= BASE ;
88
81
}
89
- * pvptr = sum ;
90
82
}
91
83
92
-
93
- /* A special adder/subtractor for the recursive divide and conquer
94
- multiply algorithm. Note: if sub is called, accum must
95
- be larger that what is being subtracted. Also, accum and val
96
- must have n_scale = 0. (e.g. they must look like integers. *) */
97
- static void _bc_shift_addsub (bc_num accum , bc_num val , int shift , bool sub )
84
+ /*
85
+ * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of unsigned longs.
86
+ * The array is generated starting with the smaller digits.
87
+ * e.g. 12345678901234567890 => {34567890, 56789012, 1234}
88
+ *
89
+ * Multiply and add these groups of numbers to perform multiplication fast.
90
+ * How much to shift the digits when adding values can be calculated from the index of the array.
91
+ */
92
+ static void bc_standard_mul (bc_num n1 , size_t n1len , bc_num n2 , int n2len , bc_num * prod )
98
93
{
99
- signed char * accp , * valp ;
100
- unsigned int carry = 0 ;
101
- size_t count = val -> n_len ;
102
-
103
- if (val -> n_value [0 ] == 0 ) {
104
- count -- ;
94
+ size_t i ;
95
+ char * n1end = n1 -> n_value + n1len - 1 ;
96
+ char * n2end = n2 -> n_value + n2len - 1 ;
97
+ size_t prodlen = n1len + n2len ;
98
+
99
+ size_t n1_arr_size = n1len / BC_LONGABLE_DIGITS + (n1len % BC_LONGABLE_DIGITS ? 1 : 0 );
100
+ size_t n2_arr_size = n2len / BC_LONGABLE_DIGITS + (n2len % BC_LONGABLE_DIGITS ? 1 : 0 );
101
+ size_t prod_arr_size = n1_arr_size + n2_arr_size - 1 ;
102
+
103
+ unsigned long n1_l [n1_arr_size ];
104
+ unsigned long n2_l [n2_arr_size ];
105
+ unsigned long prod_l [prod_arr_size ];
106
+ for (i = 0 ; i < prod_arr_size ; i ++ ) {
107
+ prod_l [i ] = 0 ;
105
108
}
106
- assert (accum -> n_len + accum -> n_scale >= shift + count );
107
-
108
- /* Set up pointers and others */
109
- accp = (signed char * ) (accum -> n_value + accum -> n_len + accum -> n_scale - shift - 1 );
110
- valp = (signed char * ) (val -> n_value + val -> n_len - 1 );
111
-
112
- if (sub ) {
113
- /* Subtraction, carry is really borrow. */
114
- while (count -- ) {
115
- * accp -= * valp -- + carry ;
116
- if (* accp < 0 ) {
117
- carry = 1 ;
118
- * accp -- += BASE ;
119
- } else {
120
- carry = 0 ;
121
- accp -- ;
122
- }
123
- }
124
- while (carry ) {
125
- * accp -= carry ;
126
- if (* accp < 0 ) {
127
- * accp -- += BASE ;
128
- } else {
129
- carry = 0 ;
130
- }
131
- }
132
- } else {
133
- /* Addition */
134
- while (count -- ) {
135
- * accp += * valp -- + carry ;
136
- if (* accp > (BASE - 1 )) {
137
- carry = 1 ;
138
- * accp -- -= BASE ;
139
- } else {
140
- carry = 0 ;
141
- accp -- ;
142
- }
143
- }
144
- while (carry ) {
145
- * accp += carry ;
146
- if (* accp > (BASE - 1 )) {
147
- * accp -- -= BASE ;
148
- } else {
149
- carry = 0 ;
150
- }
151
- }
152
- }
153
- }
154
-
155
- /* Recursive divide and conquer multiply algorithm.
156
- Based on
157
- Let u = u0 + u1*(b^n)
158
- Let v = v0 + v1*(b^n)
159
- Then uv = (B^2n+B^n)*u1*v1 + B^n*(u1-u0)*(v0-v1) + (B^n+1)*u0*v0
160
109
161
- B is the base of storage, number of digits in u1,u0 close to equal.
162
- */
163
- static void _bc_rec_mul (bc_num u , size_t ulen , bc_num v , size_t vlen , bc_num * prod )
164
- {
165
- bc_num u0 , u1 , v0 , v1 ;
166
- bc_num m1 , m2 , m3 ;
167
- size_t n ;
168
- bool m1zero ;
169
-
170
- /* Base case? */
171
- if ((ulen + vlen ) < mul_base_digits
172
- || ulen < MUL_SMALL_DIGITS
173
- || vlen < MUL_SMALL_DIGITS
174
- ) {
175
- _bc_simp_mul (u , ulen , v , vlen , prod );
176
- return ;
110
+ /* Convert n1 to long[] */
111
+ i = 0 ;
112
+ while (n1len > 0 ) {
113
+ size_t len = MIN (BC_LONGABLE_DIGITS , n1len );
114
+ n1_l [i ] = bc_partial_convert_to_long (n1end , len );
115
+ n1end -= len ;
116
+ n1len -= len ;
117
+ i ++ ;
177
118
}
178
119
179
- /* Calculate n -- the u and v split point in digits. */
180
- n = (MAX (ulen , vlen ) + 1 ) / 2 ;
181
-
182
- /* Split u and v. */
183
- if (ulen < n ) {
184
- u1 = bc_copy_num (BCG (_zero_ ));
185
- u0 = new_sub_num (ulen , 0 , u -> n_value );
186
- } else {
187
- u1 = new_sub_num (ulen - n , 0 , u -> n_value );
188
- u0 = new_sub_num (n , 0 , u -> n_value + ulen - n );
189
- }
190
- if (vlen < n ) {
191
- v1 = bc_copy_num (BCG (_zero_ ));
192
- v0 = new_sub_num (vlen , 0 , v -> n_value );
193
- } else {
194
- v1 = new_sub_num (vlen - n , 0 , v -> n_value );
195
- v0 = new_sub_num (n , 0 , v -> n_value + vlen - n );
120
+ /* Convert n2 to long[] */
121
+ i = 0 ;
122
+ while (n2len > 0 ) {
123
+ size_t len = MIN (BC_LONGABLE_DIGITS , n2len );
124
+ n2_l [i ] = bc_partial_convert_to_long (n2end , len );
125
+ n2end -= len ;
126
+ n2len -= len ;
127
+ i ++ ;
196
128
}
197
- _bc_rm_leading_zeros (u1 );
198
- _bc_rm_leading_zeros (u0 );
199
- _bc_rm_leading_zeros (v1 );
200
- _bc_rm_leading_zeros (v0 );
201
-
202
- m1zero = bc_is_zero (u1 ) || bc_is_zero (v1 );
203
-
204
- /* Calculate sub results ... */
205
-
206
- bc_num d1 = bc_sub (u1 , u0 , 0 );
207
- bc_num d2 = bc_sub (v0 , v1 , 0 );
208
-
209
129
210
- /* Do recursive multiplies and shifted adds. */
211
- if ( m1zero ) {
212
- m1 = bc_copy_num ( BCG ( _zero_ ));
213
- } else {
214
- _bc_rec_mul ( u1 , u1 -> n_len , v1 , v1 -> n_len , & m1 );
130
+ /* Multiplication and addition */
131
+ for ( i = 0 ; i < n1_arr_size ; i ++ ) {
132
+ for ( size_t j = 0 ; j < n2_arr_size ; j ++ ) {
133
+ prod_l [ i + j ] += n1_l [ i ] * n2_l [ j ];
134
+ }
215
135
}
216
136
217
- if (bc_is_zero (d1 ) || bc_is_zero (d2 )) {
218
- m2 = bc_copy_num (BCG (_zero_ ));
219
- } else {
220
- _bc_rec_mul (d1 , d1 -> n_len , d2 , d2 -> n_len , & m2 );
137
+ /*
138
+ * Move a value exceeding 8 digits by carrying to the next digit.
139
+ * However, the last digit does nothing.
140
+ */
141
+ for (i = 0 ; i < prod_arr_size - 1 ; i ++ ) {
142
+ prod_l [i + 1 ] += prod_l [i ] / BC_LONGABLE_OVERFLOW ;
143
+ prod_l [i ] %= BC_LONGABLE_OVERFLOW ;
221
144
}
222
145
223
- if (bc_is_zero (u0 ) || bc_is_zero (v0 )) {
224
- m3 = bc_copy_num (BCG (_zero_ ));
225
- } else {
226
- _bc_rec_mul (u0 , u0 -> n_len , v0 , v0 -> n_len , & m3 );
146
+ /* Convert to bc_num */
147
+ * prod = bc_new_num_nonzeroed (prodlen , 0 );
148
+ char * pptr = (* prod )-> n_value ;
149
+ char * pend = pptr + prodlen - 1 ;
150
+ i = 0 ;
151
+ while (i < prod_arr_size - 1 ) {
152
+ for (size_t j = 0 ; j < BC_LONGABLE_DIGITS ; j ++ ) {
153
+ * pend -- = prod_l [i ] % BASE ;
154
+ prod_l [i ] /= BASE ;
155
+ }
156
+ i ++ ;
227
157
}
228
158
229
- /* Initialize product */
230
- * prod = bc_new_num (ulen + vlen + 1 , 0 );
231
-
232
- if (!m1zero ) {
233
- _bc_shift_addsub (* prod , m1 , 2 * n , false);
234
- _bc_shift_addsub (* prod , m1 , n , false);
159
+ /*
160
+ * The last digit may carry over.
161
+ * Also need to fill it to the end with zeros, so loop until the end of the string.
162
+ */
163
+ while (pend >= pptr ) {
164
+ * pend -- = prod_l [i ] % BASE ;
165
+ prod_l [i ] /= BASE ;
235
166
}
236
- _bc_shift_addsub (* prod , m3 , n , false);
237
- _bc_shift_addsub (* prod , m3 , 0 , false);
238
- _bc_shift_addsub (* prod , m2 , n , d1 -> n_sign != d2 -> n_sign );
239
-
240
- /* Now clean up! */
241
- bc_free_num (& u1 );
242
- bc_free_num (& u0 );
243
- bc_free_num (& v1 );
244
- bc_free_num (& m1 );
245
- bc_free_num (& v0 );
246
- bc_free_num (& m2 );
247
- bc_free_num (& m3 );
248
- bc_free_num (& d1 );
249
- bc_free_num (& d2 );
250
167
}
251
168
252
169
/* The multiply routine. N2 times N1 is put int PROD with the scale of
@@ -255,26 +172,28 @@ static void _bc_rec_mul(bc_num u, size_t ulen, bc_num v, size_t vlen, bc_num *pr
255
172
256
173
bc_num bc_multiply (bc_num n1 , bc_num n2 , size_t scale )
257
174
{
258
- bc_num pval ;
259
- size_t len1 , len2 ;
260
- size_t full_scale , prod_scale ;
175
+ bc_num prod ;
261
176
262
177
/* Initialize things. */
263
- len1 = n1 -> n_len + n1 -> n_scale ;
264
- len2 = n2 -> n_len + n2 -> n_scale ;
265
- full_scale = n1 -> n_scale + n2 -> n_scale ;
266
- prod_scale = MIN (full_scale , MAX (scale , MAX (n1 -> n_scale , n2 -> n_scale )));
178
+ size_t len1 = n1 -> n_len + n1 -> n_scale ;
179
+ size_t len2 = n2 -> n_len + n2 -> n_scale ;
180
+ size_t full_scale = n1 -> n_scale + n2 -> n_scale ;
181
+ size_t prod_scale = MIN (full_scale , MAX (scale , MAX (n1 -> n_scale , n2 -> n_scale )));
267
182
268
183
/* Do the multiply */
269
- _bc_rec_mul (n1 , len1 , n2 , len2 , & pval );
184
+ if (len1 <= BC_LONGABLE_DIGITS && len2 <= BC_LONGABLE_DIGITS ) {
185
+ bc_fast_mul (n1 , len1 , n2 , len2 , & prod );
186
+ } else {
187
+ bc_standard_mul (n1 , len1 , n2 , len2 , & prod );
188
+ }
270
189
271
190
/* Assign to prod and clean up the number. */
272
- pval -> n_sign = (n1 -> n_sign == n2 -> n_sign ? PLUS : MINUS );
273
- pval -> n_len = len2 + len1 + 1 - full_scale ;
274
- pval -> n_scale = prod_scale ;
275
- _bc_rm_leading_zeros (pval );
276
- if (bc_is_zero (pval )) {
277
- pval -> n_sign = PLUS ;
191
+ prod -> n_sign = (n1 -> n_sign == n2 -> n_sign ? PLUS : MINUS );
192
+ prod -> n_len -= full_scale ;
193
+ prod -> n_scale = prod_scale ;
194
+ _bc_rm_leading_zeros (prod );
195
+ if (bc_is_zero (prod )) {
196
+ prod -> n_sign = PLUS ;
278
197
}
279
- return pval ;
198
+ return prod ;
280
199
}
0 commit comments