@@ -124,27 +124,26 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
124
124
bc_num _bc_do_sub (bc_num n1 , bc_num n2 , size_t scale_min )
125
125
{
126
126
bc_num diff ;
127
- size_t diff_scale , diff_len ;
128
- size_t min_scale , min_len ;
129
- size_t borrow , count ;
127
+ /* The caller is guaranteed that n1 is always large. */
128
+ size_t diff_len = EXPECTED (n1 -> n_len >= n2 -> n_len ) ? n1 -> n_len : n2 -> n_len ;
129
+ size_t diff_scale = MAX (n1 -> n_scale , n2 -> n_scale );
130
+ /* Same condition as EXPECTED before, but using EXPECTED again will make it slower. */
131
+ size_t min_len = n1 -> n_len >= n2 -> n_len ? n2 -> n_len : n1 -> n_len ;
132
+ size_t min_scale = MIN (n1 -> n_scale , n2 -> n_scale );
133
+ size_t min_bytes = min_len + min_scale ;
134
+ size_t borrow = 0 ;
135
+ size_t count ;
130
136
int val ;
131
137
char * n1ptr , * n2ptr , * diffptr ;
132
138
133
139
/* Allocate temporary storage. */
134
- diff_len = MAX (n1 -> n_len , n2 -> n_len );
135
- diff_scale = MAX (n1 -> n_scale , n2 -> n_scale );
136
- min_len = MIN (n1 -> n_len , n2 -> n_len );
137
- min_scale = MIN (n1 -> n_scale , n2 -> n_scale );
138
140
diff = bc_new_num (diff_len , MAX (diff_scale , scale_min ));
139
141
140
142
/* Initialize the subtract. */
141
143
n1ptr = (char * ) (n1 -> n_value + n1 -> n_len + n1 -> n_scale - 1 );
142
144
n2ptr = (char * ) (n2 -> n_value + n2 -> n_len + n2 -> n_scale - 1 );
143
145
diffptr = (char * ) (diff -> n_value + diff_len + diff_scale - 1 );
144
146
145
- /* Subtract the numbers. */
146
- borrow = 0 ;
147
-
148
147
/* Take care of the longer scaled number. */
149
148
if (n1 -> n_scale != min_scale ) {
150
149
/* n1 has the longer scale */
@@ -166,7 +165,59 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min)
166
165
}
167
166
168
167
/* Now do the equal length scale and integer parts. */
169
- for (count = 0 ; count < min_len + min_scale ; count ++ ) {
168
+ count = 0 ;
169
+ /* Uses SIMD to perform calculations at high speed. */
170
+ if (min_bytes >= sizeof (BC_UINT_T )) {
171
+ diffptr ++ ;
172
+ n1ptr ++ ;
173
+ n2ptr ++ ;
174
+ while (count + sizeof (BC_UINT_T ) <= min_bytes ) {
175
+ diffptr -= sizeof (BC_UINT_T );
176
+ n1ptr -= sizeof (BC_UINT_T );
177
+ n2ptr -= sizeof (BC_UINT_T );
178
+
179
+ BC_UINT_T n1bytes ;
180
+ BC_UINT_T n2bytes ;
181
+ memcpy (& n1bytes , n1ptr , sizeof (n1bytes ));
182
+ memcpy (& n2bytes , n2ptr , sizeof (n2bytes ));
183
+
184
+ #if BC_LITTLE_ENDIAN
185
+ /* Little endian requires changing the order of bytes. */
186
+ n1bytes = BC_BSWAP (n1bytes );
187
+ n2bytes = BC_BSWAP (n2bytes );
188
+ #endif
189
+
190
+ n1bytes -= n2bytes + borrow ;
191
+ /* If the most significant bit is 1, a carry down has occurred. */
192
+ bool tmp_borrow = n1bytes & ((BC_UINT_T ) 1 << (8 * sizeof (BC_UINT_T ) - 1 ));
193
+
194
+ /*
195
+ * Check the most significant bit of each of the bytes, and if it is 1, a carry down has
196
+ * occurred. When carrying down occurs, due to the difference between decimal and hexadecimal
197
+ * numbers, an extra 6 is added to the lower 4 bits.
198
+ * Therefore, for a byte that has been carried down, set all the upper 4 bits to 0 and subtract
199
+ * 6 from the lower 4 bits to adjust it to the correct value as a decimal number.
200
+ */
201
+ BC_UINT_T borrow_mask = ((n1bytes & SWAR_REPEAT (0x80 )) >> 7 ) * 0x06 ;
202
+ n1bytes = (n1bytes & SWAR_REPEAT (0x0F )) - borrow_mask ;
203
+
204
+ #if BC_LITTLE_ENDIAN
205
+ /* Little endian requires changing the order of bytes back. */
206
+ n1bytes = BC_BSWAP (n1bytes );
207
+ #endif
208
+
209
+ memcpy (diffptr , & n1bytes , sizeof (n1bytes ));
210
+
211
+ borrow = tmp_borrow ;
212
+ count += sizeof (BC_UINT_T );
213
+ }
214
+ diffptr -- ;
215
+ n1ptr -- ;
216
+ n2ptr -- ;
217
+ }
218
+
219
+ /* Calculate the remaining bytes that are less than the size of BC_UINT_T using a normal loop. */
220
+ for (; count < min_bytes ; count ++ ) {
170
221
val = * n1ptr -- - * n2ptr -- - borrow ;
171
222
if (val < 0 ) {
172
223
val += BASE ;
0 commit comments