Skip to content

Commit 3c9ab6e

Browse files
committed
ext/bcmath: Moved macros and added a test
1 parent acd6ac3 commit 3c9ab6e

File tree

5 files changed

+256
-149
lines changed

5 files changed

+256
-149
lines changed

ext/bcmath/libbcmath/src/convert.c

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,101 @@ char *bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char
6161

6262
return dest;
6363
}
64+
65+
/* This is based on the technique described in https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html.
66+
* This function transforms AABBCCDD into 1000 * AA + 100 * BB + 10 * CC + DD,
67+
* with the caveat that all components must be in the interval [0, 25] to prevent overflow
68+
* due to the multiplication by power of 10 (10 * 25 = 250 is the largest number that fits in a byte).
69+
* The advantage of this method instead of using shifts + 3 multiplications is that this is cheaper
70+
* due to its divide-and-conquer nature.
71+
*/
72+
#if SIZEOF_SIZE_T == 4
73+
BC_VECTOR bc_parse_chunk_chars(const char *str)
74+
{
75+
BC_VECTOR tmp;
76+
memcpy(&tmp, str, sizeof(tmp));
77+
#if !BC_LITTLE_ENDIAN
78+
tmp = BC_BSWAP(tmp);
79+
#endif
80+
81+
BC_VECTOR lower_digits = (tmp & 0x0f000f00) >> 8;
82+
BC_VECTOR upper_digits = (tmp & 0x000f000f) * 10;
83+
84+
tmp = lower_digits + upper_digits;
85+
86+
lower_digits = (tmp & 0x00ff0000) >> 16;
87+
upper_digits = (tmp & 0x000000ff) * 100;
88+
89+
return lower_digits + upper_digits;
90+
}
91+
#elif SIZEOF_SIZE_T == 8
92+
BC_VECTOR bc_parse_chunk_chars(const char *str)
93+
{
94+
BC_VECTOR tmp;
95+
memcpy(&tmp, str, sizeof(tmp));
96+
#if !BC_LITTLE_ENDIAN
97+
tmp = BC_BSWAP(tmp);
98+
#endif
99+
100+
BC_VECTOR lower_digits = (tmp & 0x0f000f000f000f00) >> 8;
101+
BC_VECTOR upper_digits = (tmp & 0x000f000f000f000f) * 10;
102+
103+
tmp = lower_digits + upper_digits;
104+
105+
lower_digits = (tmp & 0x00ff000000ff0000) >> 16;
106+
upper_digits = (tmp & 0x000000ff000000ff) * 100;
107+
108+
tmp = lower_digits + upper_digits;
109+
110+
lower_digits = (tmp & 0x0000ffff00000000) >> 32;
111+
upper_digits = (tmp & 0x000000000000ffff) * 10000;
112+
113+
return lower_digits + upper_digits;
114+
}
115+
#endif
116+
117+
#if BC_LITTLE_ENDIAN
118+
# define BC_ENCODE_LUT(A, B) ((A) | (B) << 4)
119+
#else
120+
# define BC_ENCODE_LUT(A, B) ((B) | (A) << 4)
121+
#endif
122+
123+
#define LUT_ITERATE(_, A) \
124+
_(A, 0), _(A, 1), _(A, 2), _(A, 3), _(A, 4), _(A, 5), _(A, 6), _(A, 7), _(A, 8), _(A, 9)
125+
126+
/* This LUT encodes the decimal representation of numbers 0-100
127+
* such that we can avoid taking modulos and divisions which would be slow. */
128+
static const unsigned char LUT[100] = {
129+
LUT_ITERATE(BC_ENCODE_LUT, 0),
130+
LUT_ITERATE(BC_ENCODE_LUT, 1),
131+
LUT_ITERATE(BC_ENCODE_LUT, 2),
132+
LUT_ITERATE(BC_ENCODE_LUT, 3),
133+
LUT_ITERATE(BC_ENCODE_LUT, 4),
134+
LUT_ITERATE(BC_ENCODE_LUT, 5),
135+
LUT_ITERATE(BC_ENCODE_LUT, 6),
136+
LUT_ITERATE(BC_ENCODE_LUT, 7),
137+
LUT_ITERATE(BC_ENCODE_LUT, 8),
138+
LUT_ITERATE(BC_ENCODE_LUT, 9),
139+
};
140+
141+
static inline unsigned short bc_expand_lut(unsigned char c)
142+
{
143+
return (c & 0x0f) | (c & 0xf0) << 4;
144+
}
145+
146+
/* Writes the character representation of the number encoded in value.
147+
* E.g. if value = 1234, then the string "1234" will be written to str. */
148+
void bc_write_bcd_representation(uint32_t value, char *str)
149+
{
150+
uint32_t upper = value / 100; /* e.g. 12 */
151+
uint32_t lower = value % 100; /* e.g. 34 */
152+
153+
#if BC_LITTLE_ENDIAN
154+
/* Note: little endian, so `lower` comes before `upper`! */
155+
uint32_t digits = bc_expand_lut(LUT[lower]) << 16 | bc_expand_lut(LUT[upper]);
156+
#else
157+
/* Note: big endian, so `upper` comes before `lower`! */
158+
uint32_t digits = bc_expand_lut(LUT[upper]) << 16 | bc_expand_lut(LUT[lower]);
159+
#endif
160+
memcpy(str, &digits, sizeof(digits));
161+
}

ext/bcmath/libbcmath/src/convert.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,47 @@
1414
+----------------------------------------------------------------------+
1515
*/
1616

17+
#include "private.h"
18+
1719
#ifndef BCMATH_CONVERT_H
1820
#define BCMATH_CONVERT_H
1921

2022
char *bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char *source_end);
23+
void bc_write_bcd_representation(uint32_t value, char *str);
24+
BC_VECTOR bc_parse_chunk_chars(const char *str);
25+
26+
/*
27+
* Converts bc_num to BC_VECTOR, going backwards from pointer n by the number of
28+
* characters specified by len.
29+
*/
30+
static inline BC_VECTOR bc_partial_convert_to_vector(const char *n, size_t len)
31+
{
32+
if (len == BC_VECTOR_SIZE) {
33+
return bc_parse_chunk_chars(n - BC_VECTOR_SIZE + 1);
34+
}
35+
36+
BC_VECTOR num = 0;
37+
BC_VECTOR base = 1;
38+
39+
for (size_t i = 0; i < len; i++) {
40+
num += *n * base;
41+
base *= BASE;
42+
n--;
43+
}
44+
45+
return num;
46+
}
47+
48+
static inline void bc_convert_to_vector(BC_VECTOR *n_vector, const char *nend, size_t nlen)
49+
{
50+
size_t i = 0;
51+
while (nlen > 0) {
52+
size_t len = MIN(BC_VECTOR_SIZE, nlen);
53+
n_vector[i] = bc_partial_convert_to_vector(nend, len);
54+
nend -= len;
55+
nlen -= len;
56+
i++;
57+
}
58+
}
2159

2260
#endif

ext/bcmath/libbcmath/src/private.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,15 @@ static inline uint64_t BC_BSWAP64(uint64_t u)
8484
#if SIZEOF_SIZE_T >= 8
8585
# define BC_BSWAP(u) BC_BSWAP64(u)
8686
typedef uint64_t BC_VECTOR;
87+
# define BC_VECTOR_SIZE 8
88+
/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */
89+
# define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 100000000
8790
#else
8891
# define BC_BSWAP(u) BC_BSWAP32(u)
8992
typedef uint32_t BC_VECTOR;
93+
# define BC_VECTOR_SIZE 4
94+
/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */
95+
# define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 10000
9096
#endif
9197

9298
#ifdef WORDS_BIGENDIAN
@@ -95,6 +101,12 @@ static inline uint64_t BC_BSWAP64(uint64_t u)
95101
# define BC_LITTLE_ENDIAN 1
96102
#endif
97103

104+
/*
105+
* Adding more than this many times may cause uint32_t/uint64_t to overflow.
106+
* Typically this is 1844 for 64bit and 42 for 32bit.
107+
*/
108+
#define BC_VECTOR_NO_OVERFLOW_ADD_COUNT (~((BC_VECTOR) 0) / (BC_VECTOR_BOUNDARY_NUM * BC_VECTOR_BOUNDARY_NUM))
109+
98110

99111
/* routines */
100112
bcmath_compare_result _bc_do_compare (bc_num n1, bc_num n2, size_t scale, bool use_sign);

ext/bcmath/libbcmath/src/recmul.c

Lines changed: 1 addition & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -34,26 +34,10 @@
3434
#include <assert.h>
3535
#include <stdbool.h>
3636
#include "private.h"
37+
#include "convert.h"
3738
#include "zend_alloc.h"
3839

3940

40-
#if SIZEOF_SIZE_T >= 8
41-
# define BC_VECTOR_SIZE 8
42-
/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */
43-
# define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 100000000
44-
#else
45-
# define BC_VECTOR_SIZE 4
46-
/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */
47-
# define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 10000
48-
#endif
49-
50-
/*
51-
* Adding more than this many times may cause uint32_t/uint64_t to overflow.
52-
* Typically this is 1844 for 64bit and 42 for 32bit.
53-
*/
54-
#define BC_VECTOR_NO_OVERFLOW_ADD_COUNT (~((BC_VECTOR) 0) / (BC_VECTOR_BOUNDARY_NUM * BC_VECTOR_BOUNDARY_NUM))
55-
56-
5741
/* Multiply utility routines */
5842

5943
static inline void bc_mul_carry_calc(BC_VECTOR *prod_vector, size_t prod_arr_size)
@@ -64,92 +48,6 @@ static inline void bc_mul_carry_calc(BC_VECTOR *prod_vector, size_t prod_arr_siz
6448
}
6549
}
6650

67-
/* This is based on the technique described in https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html.
68-
* This function transforms AABBCCDD into 1000 * AA + 100 * BB + 10 * CC + DD,
69-
* with the caveat that all components must be in the interval [0, 25] to prevent overflow
70-
* due to the multiplication by power of 10 (10 * 25 = 250 is the largest number that fits in a byte).
71-
* The advantage of this method instead of using shifts + 3 multiplications is that this is cheaper
72-
* due to its divide-and-conquer nature.
73-
*/
74-
#if SIZEOF_SIZE_T == 4
75-
static BC_VECTOR bc_parse_chunk_chars(const char *str)
76-
{
77-
BC_VECTOR tmp;
78-
memcpy(&tmp, str, sizeof(tmp));
79-
#if !BC_LITTLE_ENDIAN
80-
tmp = BC_BSWAP(tmp);
81-
#endif
82-
83-
BC_VECTOR lower_digits = (tmp & 0x0f000f00) >> 8;
84-
BC_VECTOR upper_digits = (tmp & 0x000f000f) * 10;
85-
86-
tmp = lower_digits + upper_digits;
87-
88-
lower_digits = (tmp & 0x00ff0000) >> 16;
89-
upper_digits = (tmp & 0x000000ff) * 100;
90-
91-
return lower_digits + upper_digits;
92-
}
93-
#elif SIZEOF_SIZE_T == 8
94-
static BC_VECTOR bc_parse_chunk_chars(const char *str)
95-
{
96-
BC_VECTOR tmp;
97-
memcpy(&tmp, str, sizeof(tmp));
98-
#if !BC_LITTLE_ENDIAN
99-
tmp = BC_BSWAP(tmp);
100-
#endif
101-
102-
BC_VECTOR lower_digits = (tmp & 0x0f000f000f000f00) >> 8;
103-
BC_VECTOR upper_digits = (tmp & 0x000f000f000f000f) * 10;
104-
105-
tmp = lower_digits + upper_digits;
106-
107-
lower_digits = (tmp & 0x00ff000000ff0000) >> 16;
108-
upper_digits = (tmp & 0x000000ff000000ff) * 100;
109-
110-
tmp = lower_digits + upper_digits;
111-
112-
lower_digits = (tmp & 0x0000ffff00000000) >> 32;
113-
upper_digits = (tmp & 0x000000000000ffff) * 10000;
114-
115-
return lower_digits + upper_digits;
116-
}
117-
#endif
118-
119-
/*
120-
* Converts bc_num to BC_VECTOR, going backwards from pointer n by the number of
121-
* characters specified by len.
122-
*/
123-
static inline BC_VECTOR bc_partial_convert_to_vector(const char *n, size_t len)
124-
{
125-
if (len == BC_VECTOR_SIZE) {
126-
return bc_parse_chunk_chars(n - BC_VECTOR_SIZE + 1);
127-
}
128-
129-
BC_VECTOR num = 0;
130-
BC_VECTOR base = 1;
131-
132-
for (size_t i = 0; i < len; i++) {
133-
num += *n * base;
134-
base *= BASE;
135-
n--;
136-
}
137-
138-
return num;
139-
}
140-
141-
static inline void bc_convert_to_vector(BC_VECTOR *n_vector, const char *nend, size_t nlen)
142-
{
143-
size_t i = 0;
144-
while (nlen > 0) {
145-
size_t len = MIN(BC_VECTOR_SIZE, nlen);
146-
n_vector[i] = bc_partial_convert_to_vector(nend, len);
147-
nend -= len;
148-
nlen -= len;
149-
i++;
150-
}
151-
}
152-
15351
/*
15452
* If the n_values of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less,
15553
* the calculation will be performed at high speed without using an array.
@@ -174,52 +72,6 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len,
17472
}
17573
}
17674

177-
#if BC_LITTLE_ENDIAN
178-
# define BC_ENCODE_LUT(A, B) ((A) | (B) << 4)
179-
#else
180-
# define BC_ENCODE_LUT(A, B) ((B) | (A) << 4)
181-
#endif
182-
183-
#define LUT_ITERATE(_, A) \
184-
_(A, 0), _(A, 1), _(A, 2), _(A, 3), _(A, 4), _(A, 5), _(A, 6), _(A, 7), _(A, 8), _(A, 9)
185-
186-
/* This LUT encodes the decimal representation of numbers 0-100
187-
* such that we can avoid taking modulos and divisions which would be slow. */
188-
static const unsigned char LUT[100] = {
189-
LUT_ITERATE(BC_ENCODE_LUT, 0),
190-
LUT_ITERATE(BC_ENCODE_LUT, 1),
191-
LUT_ITERATE(BC_ENCODE_LUT, 2),
192-
LUT_ITERATE(BC_ENCODE_LUT, 3),
193-
LUT_ITERATE(BC_ENCODE_LUT, 4),
194-
LUT_ITERATE(BC_ENCODE_LUT, 5),
195-
LUT_ITERATE(BC_ENCODE_LUT, 6),
196-
LUT_ITERATE(BC_ENCODE_LUT, 7),
197-
LUT_ITERATE(BC_ENCODE_LUT, 8),
198-
LUT_ITERATE(BC_ENCODE_LUT, 9),
199-
};
200-
201-
static inline unsigned short bc_expand_lut(unsigned char c)
202-
{
203-
return (c & 0x0f) | (c & 0xf0) << 4;
204-
}
205-
206-
/* Writes the character representation of the number encoded in value.
207-
* E.g. if value = 1234, then the string "1234" will be written to str. */
208-
static void bc_write_bcd_representation(uint32_t value, char *str)
209-
{
210-
uint32_t upper = value / 100; /* e.g. 12 */
211-
uint32_t lower = value % 100; /* e.g. 34 */
212-
213-
#if BC_LITTLE_ENDIAN
214-
/* Note: little endian, so `lower` comes before `upper`! */
215-
uint32_t digits = bc_expand_lut(LUT[lower]) << 16 | bc_expand_lut(LUT[upper]);
216-
#else
217-
/* Note: big endian, so `upper` comes before `lower`! */
218-
uint32_t digits = bc_expand_lut(LUT[upper]) << 16 | bc_expand_lut(LUT[lower]);
219-
#endif
220-
memcpy(str, &digits, sizeof(digits));
221-
}
222-
22375
/*
22476
* Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_VECTOR.
22577
* The array is generated starting with the smaller digits.

0 commit comments

Comments
 (0)