ext/bcmath: Moved macros and added a test

SakiTakamachi · SakiTakamachi · commit 3c9ab6eb71d0 · 2024-07-17T21:48:16.000+09:00
diff --git a/ext/bcmath/libbcmath/src/convert.c b/ext/bcmath/libbcmath/src/convert.c
@@ -61,3 +61,101 @@ char *bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char
 
 	return dest;
 }
+
+/* This is based on the technique described in https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html.
+ * This function transforms AABBCCDD into 1000 * AA + 100 * BB + 10 * CC + DD,
+ * with the caveat that all components must be in the interval [0, 25] to prevent overflow
+ * due to the multiplication by power of 10 (10 * 25 = 250 is the largest number that fits in a byte).
+ * The advantage of this method instead of using shifts + 3 multiplications is that this is cheaper
+ * due to its divide-and-conquer nature.
+ */
+#if SIZEOF_SIZE_T == 4
+BC_VECTOR bc_parse_chunk_chars(const char *str)
+{
+	BC_VECTOR tmp;
+	memcpy(&tmp, str, sizeof(tmp));
+#if !BC_LITTLE_ENDIAN
+	tmp = BC_BSWAP(tmp);
+#endif
+
+	BC_VECTOR lower_digits = (tmp & 0x0f000f00) >> 8;
+	BC_VECTOR upper_digits = (tmp & 0x000f000f) * 10;
+
+	tmp = lower_digits + upper_digits;
+
+	lower_digits = (tmp & 0x00ff0000) >> 16;
+	upper_digits = (tmp & 0x000000ff) * 100;
+
+	return lower_digits + upper_digits;
+}
+#elif SIZEOF_SIZE_T == 8
+BC_VECTOR bc_parse_chunk_chars(const char *str)
+{
+	BC_VECTOR tmp;
+	memcpy(&tmp, str, sizeof(tmp));
+#if !BC_LITTLE_ENDIAN
+	tmp = BC_BSWAP(tmp);
+#endif
+
+	BC_VECTOR lower_digits = (tmp & 0x0f000f000f000f00) >> 8;
+	BC_VECTOR upper_digits = (tmp & 0x000f000f000f000f) * 10;
+
+	tmp = lower_digits + upper_digits;
+
+	lower_digits = (tmp & 0x00ff000000ff0000) >> 16;
+	upper_digits = (tmp & 0x000000ff000000ff) * 100;
+
+	tmp = lower_digits + upper_digits;
+
+	lower_digits = (tmp & 0x0000ffff00000000) >> 32;
+	upper_digits = (tmp & 0x000000000000ffff) * 10000;
+
+	return lower_digits + upper_digits;
+}
+#endif
+
+#if BC_LITTLE_ENDIAN
+# define BC_ENCODE_LUT(A, B) ((A) | (B) << 4)
+#else
+# define BC_ENCODE_LUT(A, B) ((B) | (A) << 4)
+#endif
+
+#define LUT_ITERATE(_, A) \
+	_(A, 0), _(A, 1), _(A, 2), _(A, 3), _(A, 4), _(A, 5), _(A, 6), _(A, 7), _(A, 8), _(A, 9)
+
+/* This LUT encodes the decimal representation of numbers 0-100
+ * such that we can avoid taking modulos and divisions which would be slow. */
+static const unsigned char LUT[100] = {
+	LUT_ITERATE(BC_ENCODE_LUT, 0),
+	LUT_ITERATE(BC_ENCODE_LUT, 1),
+	LUT_ITERATE(BC_ENCODE_LUT, 2),
+	LUT_ITERATE(BC_ENCODE_LUT, 3),
+	LUT_ITERATE(BC_ENCODE_LUT, 4),
+	LUT_ITERATE(BC_ENCODE_LUT, 5),
+	LUT_ITERATE(BC_ENCODE_LUT, 6),
+	LUT_ITERATE(BC_ENCODE_LUT, 7),
+	LUT_ITERATE(BC_ENCODE_LUT, 8),
+	LUT_ITERATE(BC_ENCODE_LUT, 9),
+};
+
+static inline unsigned short bc_expand_lut(unsigned char c)
+{
+	return (c & 0x0f) | (c & 0xf0) << 4;
+}
+
+/* Writes the character representation of the number encoded in value.
+ * E.g. if value = 1234, then the string "1234" will be written to str. */
+void bc_write_bcd_representation(uint32_t value, char *str)
+{
+	uint32_t upper = value / 100; /* e.g. 12 */
+	uint32_t lower = value % 100; /* e.g. 34 */
+
+#if BC_LITTLE_ENDIAN
+	/* Note: little endian, so `lower` comes before `upper`! */
+	uint32_t digits = bc_expand_lut(LUT[lower]) << 16 | bc_expand_lut(LUT[upper]);
+#else
+	/* Note: big endian, so `upper` comes before `lower`! */
+	uint32_t digits = bc_expand_lut(LUT[upper]) << 16 | bc_expand_lut(LUT[lower]);
+#endif
+	memcpy(str, &digits, sizeof(digits));
+}
diff --git a/ext/bcmath/libbcmath/src/convert.h b/ext/bcmath/libbcmath/src/convert.h
@@ -14,9 +14,47 @@
    +----------------------------------------------------------------------+
 */
 
+#include "private.h"
+
 #ifndef BCMATH_CONVERT_H
 #define BCMATH_CONVERT_H
 
 char *bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char *source_end);
+void bc_write_bcd_representation(uint32_t value, char *str);
+BC_VECTOR bc_parse_chunk_chars(const char *str);
+
+/*
+ * Converts bc_num to BC_VECTOR, going backwards from pointer n by the number of
+ * characters specified by len.
+ */
+static inline BC_VECTOR bc_partial_convert_to_vector(const char *n, size_t len)
+{
+	if (len == BC_VECTOR_SIZE) {
+		return bc_parse_chunk_chars(n - BC_VECTOR_SIZE + 1);
+	}
+
+	BC_VECTOR num = 0;
+	BC_VECTOR base = 1;
+
+	for (size_t i = 0; i < len; i++) {
+		num += *n * base;
+		base *= BASE;
+		n--;
+	}
+
+	return num;
+}
+
+static inline void bc_convert_to_vector(BC_VECTOR *n_vector, const char *nend, size_t nlen)
+{
+	size_t i = 0;
+	while (nlen > 0) {
+		size_t len = MIN(BC_VECTOR_SIZE, nlen);
+		n_vector[i] = bc_partial_convert_to_vector(nend, len);
+		nend -= len;
+		nlen -= len;
+		i++;
+	}
+}
 
 #endif
diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h
@@ -84,9 +84,15 @@ static inline uint64_t BC_BSWAP64(uint64_t u)
 #if SIZEOF_SIZE_T >= 8
 #  define BC_BSWAP(u) BC_BSWAP64(u)
    typedef uint64_t BC_VECTOR;
+#  define BC_VECTOR_SIZE 8
+/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */
+#  define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 100000000
 #else
 #  define BC_BSWAP(u) BC_BSWAP32(u)
    typedef uint32_t BC_VECTOR;
+#  define BC_VECTOR_SIZE 4
+/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */
+#  define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 10000
 #endif
 
 #ifdef WORDS_BIGENDIAN
@@ -95,6 +101,12 @@ static inline uint64_t BC_BSWAP64(uint64_t u)
 #  define BC_LITTLE_ENDIAN 1
 #endif
 
+/*
+ * Adding more than this many times may cause uint32_t/uint64_t to overflow.
+ * Typically this is 1844 for 64bit and 42 for 32bit.
+ */
+#define BC_VECTOR_NO_OVERFLOW_ADD_COUNT (~((BC_VECTOR) 0) / (BC_VECTOR_BOUNDARY_NUM * BC_VECTOR_BOUNDARY_NUM))
+
 
 /* routines */
 bcmath_compare_result _bc_do_compare (bc_num n1, bc_num n2, size_t scale, bool use_sign);
diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c
@@ -34,26 +34,10 @@
 #include <assert.h>
 #include <stdbool.h>
 #include "private.h"
+#include "convert.h"
 #include "zend_alloc.h"
 
 
-#if SIZEOF_SIZE_T >= 8
-#  define BC_VECTOR_SIZE 8
-/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */
-#  define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 100000000
-#else
-#  define BC_VECTOR_SIZE 4
-/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */
-#  define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 10000
-#endif
-
-/*
- * Adding more than this many times may cause uint32_t/uint64_t to overflow.
- * Typically this is 1844 for 64bit and 42 for 32bit.
- */
-#define BC_VECTOR_NO_OVERFLOW_ADD_COUNT (~((BC_VECTOR) 0) / (BC_VECTOR_BOUNDARY_NUM * BC_VECTOR_BOUNDARY_NUM))
-
-
 /* Multiply utility routines */
 
 static inline void bc_mul_carry_calc(BC_VECTOR *prod_vector, size_t prod_arr_size)
@@ -64,92 +48,6 @@ static inline void bc_mul_carry_calc(BC_VECTOR *prod_vector, size_t prod_arr_siz
 	}
 }
 
-/* This is based on the technique described in https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html.
- * This function transforms AABBCCDD into 1000 * AA + 100 * BB + 10 * CC + DD,
- * with the caveat that all components must be in the interval [0, 25] to prevent overflow
- * due to the multiplication by power of 10 (10 * 25 = 250 is the largest number that fits in a byte).
- * The advantage of this method instead of using shifts + 3 multiplications is that this is cheaper
- * due to its divide-and-conquer nature.
- */
-#if SIZEOF_SIZE_T == 4
-static BC_VECTOR bc_parse_chunk_chars(const char *str)
-{
-	BC_VECTOR tmp;
-	memcpy(&tmp, str, sizeof(tmp));
-#if !BC_LITTLE_ENDIAN
-	tmp = BC_BSWAP(tmp);
-#endif
-
-	BC_VECTOR lower_digits = (tmp & 0x0f000f00) >> 8;
-	BC_VECTOR upper_digits = (tmp & 0x000f000f) * 10;
-
-	tmp = lower_digits + upper_digits;
-
-	lower_digits = (tmp & 0x00ff0000) >> 16;
-	upper_digits = (tmp & 0x000000ff) * 100;
-
-	return lower_digits + upper_digits;
-}
-#elif SIZEOF_SIZE_T == 8
-static BC_VECTOR bc_parse_chunk_chars(const char *str)
-{
-	BC_VECTOR tmp;
-	memcpy(&tmp, str, sizeof(tmp));
-#if !BC_LITTLE_ENDIAN
-	tmp = BC_BSWAP(tmp);
-#endif
-
-	BC_VECTOR lower_digits = (tmp & 0x0f000f000f000f00) >> 8;
-	BC_VECTOR upper_digits = (tmp & 0x000f000f000f000f) * 10;
-
-	tmp = lower_digits + upper_digits;
-
-	lower_digits = (tmp & 0x00ff000000ff0000) >> 16;
-	upper_digits = (tmp & 0x000000ff000000ff) * 100;
-
-	tmp = lower_digits + upper_digits;
-
-	lower_digits = (tmp & 0x0000ffff00000000) >> 32;
-	upper_digits = (tmp & 0x000000000000ffff) * 10000;
-
-	return lower_digits + upper_digits;
-}
-#endif
-
-/*
- * Converts bc_num to BC_VECTOR, going backwards from pointer n by the number of
- * characters specified by len.
- */
-static inline BC_VECTOR bc_partial_convert_to_vector(const char *n, size_t len)
-{
-	if (len == BC_VECTOR_SIZE) {
-		return bc_parse_chunk_chars(n - BC_VECTOR_SIZE + 1);
-	}
-
-	BC_VECTOR num = 0;
-	BC_VECTOR base = 1;
-
-	for (size_t i = 0; i < len; i++) {
-		num += *n * base;
-		base *= BASE;
-		n--;
-	}
-
-	return num;
-}
-
-static inline void bc_convert_to_vector(BC_VECTOR *n_vector, const char *nend, size_t nlen)
-{
-	size_t i = 0;
-	while (nlen > 0) {
-		size_t len = MIN(BC_VECTOR_SIZE, nlen);
-		n_vector[i] = bc_partial_convert_to_vector(nend, len);
-		nend -= len;
-		nlen -= len;
-		i++;
-	}
-}
-
 /*
  * If the n_values of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less,
  * the calculation will be performed at high speed without using an array.
@@ -174,52 +72,6 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len,
 	}
 }
 
-#if BC_LITTLE_ENDIAN
-# define BC_ENCODE_LUT(A, B) ((A) | (B) << 4)
-#else
-# define BC_ENCODE_LUT(A, B) ((B) | (A) << 4)
-#endif
-
-#define LUT_ITERATE(_, A) \
-	_(A, 0), _(A, 1), _(A, 2), _(A, 3), _(A, 4), _(A, 5), _(A, 6), _(A, 7), _(A, 8), _(A, 9)
-
-/* This LUT encodes the decimal representation of numbers 0-100
- * such that we can avoid taking modulos and divisions which would be slow. */
-static const unsigned char LUT[100] = {
-	LUT_ITERATE(BC_ENCODE_LUT, 0),
-	LUT_ITERATE(BC_ENCODE_LUT, 1),
-	LUT_ITERATE(BC_ENCODE_LUT, 2),
-	LUT_ITERATE(BC_ENCODE_LUT, 3),
-	LUT_ITERATE(BC_ENCODE_LUT, 4),
-	LUT_ITERATE(BC_ENCODE_LUT, 5),
-	LUT_ITERATE(BC_ENCODE_LUT, 6),
-	LUT_ITERATE(BC_ENCODE_LUT, 7),
-	LUT_ITERATE(BC_ENCODE_LUT, 8),
-	LUT_ITERATE(BC_ENCODE_LUT, 9),
-};
-
-static inline unsigned short bc_expand_lut(unsigned char c)
-{
-	return (c & 0x0f) | (c & 0xf0) << 4;
-}
-
-/* Writes the character representation of the number encoded in value.
- * E.g. if value = 1234, then the string "1234" will be written to str. */
-static void bc_write_bcd_representation(uint32_t value, char *str)
-{
-	uint32_t upper = value / 100; /* e.g. 12 */
-	uint32_t lower = value % 100; /* e.g. 34 */
-
-#if BC_LITTLE_ENDIAN
-	/* Note: little endian, so `lower` comes before `upper`! */
-	uint32_t digits = bc_expand_lut(LUT[lower]) << 16 | bc_expand_lut(LUT[upper]);
-#else
-	/* Note: big endian, so `upper` comes before `lower`! */
-	uint32_t digits = bc_expand_lut(LUT[upper]) << 16 | bc_expand_lut(LUT[lower]);
-#endif
-	memcpy(str, &digits, sizeof(digits));
-}
-
 /*
  * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_VECTOR.
  * The array is generated starting with the smaller digits.
diff --git a/ext/bcmath/tests/bcdiv_by_pow_10.phpt b/ext/bcmath/tests/bcdiv_by_pow_10.phpt