From d58fbe574cfffbe7a50a375598589218070abba7 Mon Sep 17 00:00:00 2001
From: Saki Takamachi <saki@sakiot.com>
Date: Fri, 10 May 2024 21:30:22 +0900
Subject: [PATCH 1/5] The basic structure of the code was changed to the same
 as sub

---
 ext/bcmath/libbcmath/src/doaddsub.c | 82 +++++++++++++----------------
 1 file changed, 37 insertions(+), 45 deletions(-)

diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c
index eb2f7b6645d7a..ff24cc8a6bea0 100644
--- a/ext/bcmath/libbcmath/src/doaddsub.c
+++ b/ext/bcmath/libbcmath/src/doaddsub.c
@@ -41,75 +41,67 @@
 bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
 {
 	bc_num sum;
-	size_t sum_scale, sum_digits;
+	size_t sum_len = MAX(n1->n_len, n2->n_len) + 1;
+	size_t sum_scale = MAX(n1->n_scale, n2->n_scale);
+	size_t min_len = MIN (n1->n_len, n2->n_len);
+	size_t min_scale = MIN(n1->n_scale, n2->n_scale);
+	size_t min_bytes = min_len + min_scale;
 	char *n1ptr, *n2ptr, *sumptr;
-	size_t n1bytes, n2bytes;
-	bool carry;
+	bool carry = 0;
+	size_t count;
 
 	/* Prepare sum. */
-	sum_scale = MAX (n1->n_scale, n2->n_scale);
-	sum_digits = MAX (n1->n_len, n2->n_len) + 1;
-	sum = bc_new_num (sum_digits, MAX(sum_scale, scale_min));
+	sum = bc_new_num (sum_len, MAX(sum_scale, scale_min));
 
 	/* Start with the fraction part.  Initialize the pointers. */
-	n1bytes = n1->n_scale;
-	n2bytes = n2->n_scale;
-	n1ptr = (char *) (n1->n_value + n1->n_len + n1bytes - 1);
-	n2ptr = (char *) (n2->n_value + n2->n_len + n2bytes - 1);
-	sumptr = (char *) (sum->n_value + sum_scale + sum_digits - 1);
+	n1ptr = (char *) (n1->n_value + n1->n_len + n1->n_scale - 1);
+	n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1);
+	sumptr = (char *) (sum->n_value + sum_scale + sum_len - 1);
 
 	/* Add the fraction part.  First copy the longer fraction.*/
-	if (n1bytes != n2bytes) {
-		if (n1bytes > n2bytes) {
-			while (n1bytes > n2bytes) {
-				*sumptr-- = *n1ptr--;
-				n1bytes--;
-			}
-		} else {
-			while (n2bytes > n1bytes) {
-				*sumptr-- = *n2ptr--;
-				n2bytes--;
-			}
+	if (n1->n_scale != min_scale) {
+		/* n1 has the longer scale */
+		for (count = n1->n_scale - min_scale; count > 0; count--) {
+			*sumptr-- = *n1ptr--;
+		}
+	} else {
+		/* n2 has the longer scale */
+		for (count = n2->n_scale - min_scale; count > 0; count--) {
+			*sumptr-- = *n2ptr--;
 		}
 	}
 
 	/* Now add the remaining fraction part and equal size integer parts. */
-	n1bytes += n1->n_len;
-	n2bytes += n2->n_len;
-	carry = 0;
-	while ((n1bytes > 0) && (n2bytes > 0)) {
+	for (count = 0; count < min_bytes; count++) {
 		*sumptr = *n1ptr-- + *n2ptr-- + carry;
-		if (*sumptr > (BASE - 1)) {
-			carry = 1;
+		if (*sumptr >= BASE) {
 			*sumptr -= BASE;
+			carry = 1;
 		} else {
 			carry = 0;
 		}
 		sumptr--;
-		n1bytes--;
-		n2bytes--;
 	}
 
 	/* Now add carry the longer integer part. */
-	if (n1bytes == 0) {
-		n1bytes = n2bytes;
-		n1ptr = n2ptr;
-	}
-	while (n1bytes-- > 0) {
-		*sumptr = *n1ptr-- + carry;
-		if (*sumptr > (BASE - 1)) {
-			carry = true;
-			*sumptr -= BASE;
-		} else {
-			carry = false;
+	if (sum_len != min_len) {
+		if (n2->n_len > n1->n_len) {
+			n1ptr = n2ptr;
+		}
+		for (count = sum_len - min_len; count > 0; count--) {
+			*sumptr = *n1ptr-- + carry;
+			if (*sumptr >= BASE) {
+				*sumptr -= BASE;
+				carry = 1;
+			} else {
+				carry = 0;
+			}
+			sumptr--;
 		}
-		sumptr--;
 	}
 
 	/* Set final carry. */
-	if (carry) {
-		*sumptr += 1;
-	}
+	*sumptr += carry;
 
 	/* Adjust sum and return. */
 	_bc_rm_leading_zeros(sum);

From 09352132100a3a2e1120af3631932d6ab538df91 Mon Sep 17 00:00:00 2001
From: Saki Takamachi <saki@sakiot.com>
Date: Fri, 10 May 2024 23:02:49 +0900
Subject: [PATCH 2/5] use SIMD

---
 ext/bcmath/libbcmath/src/doaddsub.c | 52 ++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c
index ff24cc8a6bea0..f68de54ecd05b 100644
--- a/ext/bcmath/libbcmath/src/doaddsub.c
+++ b/ext/bcmath/libbcmath/src/doaddsub.c
@@ -72,7 +72,57 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
 	}
 
 	/* Now add the remaining fraction part and equal size integer parts. */
-	for (count = 0; count < min_bytes; count++) {
+	count = 0;
+	/* Uses SIMD to perform calculations at high speed. */
+	if (min_bytes >= sizeof(BC_UINT_T)) {
+		sumptr++;
+		n1ptr++;
+		n2ptr++;
+		while (count + sizeof(BC_UINT_T) <= min_bytes) {
+			sumptr -= sizeof(BC_UINT_T);
+			n1ptr -= sizeof(BC_UINT_T);
+			n2ptr -= sizeof(BC_UINT_T);
+
+			BC_UINT_T n1bytes;
+			BC_UINT_T n2bytes;
+			memcpy(&n1bytes, n1ptr, sizeof(n1bytes));
+			memcpy(&n2bytes, n2ptr, sizeof(n2bytes));
+
+#if BC_LITTLE_ENDIAN
+			/* Little endian requires changing the order of bytes. */
+			n1bytes = BC_BSWAP(n1bytes);
+			n2bytes = BC_BSWAP(n2bytes);
+#endif
+
+			n1bytes += SWAR_REPEAT(0xF6) + n2bytes + carry;
+			/* If the most significant bit is 1, a carry down has occurred. */
+			carry = !(n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1)));
+
+			/*
+			 * Check the most significant bit of each of the bytes, and if it is 1, a carry down has
+			 * occurred. When carrying down occurs, due to the difference between decimal and hexadecimal
+			 * numbers, an extra 6 is added to the lower 4 bits.
+			 * Therefore, for a byte that has been carried down, set all the upper 4 bits to 0 and subtract
+			 * 6 from the lower 4 bits to adjust it to the correct value as a decimal number.
+			 */
+			BC_UINT_T sum_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0xF6;
+			n1bytes -= sum_mask;
+
+#if BC_LITTLE_ENDIAN
+			/* Little endian requires changing the order of bytes back. */
+			n1bytes = BC_BSWAP(n1bytes);
+#endif
+
+			memcpy(sumptr, &n1bytes, sizeof(n1bytes));
+
+			count += sizeof(BC_UINT_T);
+		}
+		sumptr--;
+		n1ptr--;
+		n2ptr--;
+	}
+
+	for (; count < min_bytes; count++) {
 		*sumptr = *n1ptr-- + *n2ptr-- + carry;
 		if (*sumptr >= BASE) {
 			*sumptr -= BASE;

From aa80c0d2224f4263ced7706748a191afbee530bf Mon Sep 17 00:00:00 2001
From: Saki Takamachi <saki@sakiot.com>
Date: Fri, 10 May 2024 23:22:01 +0900
Subject: [PATCH 3/5] fixed comments

---
 ext/bcmath/libbcmath/src/doaddsub.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c
index f68de54ecd05b..70897a40d67c7 100644
--- a/ext/bcmath/libbcmath/src/doaddsub.c
+++ b/ext/bcmath/libbcmath/src/doaddsub.c
@@ -94,16 +94,22 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
 			n2bytes = BC_BSWAP(n2bytes);
 #endif
 
+			/*
+			 * In order to add 1 to the "next digit" when a carry occurs, adjust it so that it
+			 * overflows when add 10.
+			 * e.g.
+			 * 00001001(9) + 00000001(1) = 00001010(10) to
+			 * 11111111 + 00000001 = 00000000(0) and carry 1
+			 */
 			n1bytes += SWAR_REPEAT(0xF6) + n2bytes + carry;
-			/* If the most significant bit is 1, a carry down has occurred. */
+			/* If the most significant bit is 0, a carry has occurred. */
 			carry = !(n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1)));
 
 			/*
-			 * Check the most significant bit of each of the bytes, and if it is 1, a carry down has
-			 * occurred. When carrying down occurs, due to the difference between decimal and hexadecimal
-			 * numbers, an extra 6 is added to the lower 4 bits.
-			 * Therefore, for a byte that has been carried down, set all the upper 4 bits to 0 and subtract
-			 * 6 from the lower 4 bits to adjust it to the correct value as a decimal number.
+			 * The calculation result is a mixture of bytes that have been carried and bytes that have not.
+			 * The most significant bit of each byte is 0 if it is carried forward, and 1 if it is not.
+			 * Using this, subtract the 0xF6 added for adjustment from the byte that has not been carried
+			 * over to return it to the correct value as a decimal number.
 			 */
 			BC_UINT_T sum_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0xF6;
 			n1bytes -= sum_mask;

From 64eb41a99802823a4d6be75a2bd657dbc199b8f3 Mon Sep 17 00:00:00 2001
From: Saki Takamachi <34942839+SakiTakamachi@users.noreply.github.com>
Date: Sat, 11 May 2024 07:53:30 +0900
Subject: [PATCH 4/5] fix compare length

---
 ext/bcmath/libbcmath/src/doaddsub.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c
index 70897a40d67c7..3b27ea8b1ef9f 100644
--- a/ext/bcmath/libbcmath/src/doaddsub.c
+++ b/ext/bcmath/libbcmath/src/doaddsub.c
@@ -140,11 +140,11 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
 	}
 
 	/* Now add carry the longer integer part. */
-	if (sum_len != min_len) {
+	if (sum_len - 1 != min_len) {
 		if (n2->n_len > n1->n_len) {
 			n1ptr = n2ptr;
 		}
-		for (count = sum_len - min_len; count > 0; count--) {
+		for (count = sum_len - 1 - min_len; count > 0; count--) {
 			*sumptr = *n1ptr-- + carry;
 			if (*sumptr >= BASE) {
 				*sumptr -= BASE;

From e07dd7c2cbcce7e180324f237cc1779811f9b789 Mon Sep 17 00:00:00 2001
From: Saki Takamachi <saki@sakiot.com>
Date: Sat, 11 May 2024 08:42:32 +0900
Subject: [PATCH 5/5] optimize

---
 ext/bcmath/libbcmath/src/doaddsub.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ext/bcmath/libbcmath/src/doaddsub.c b/ext/bcmath/libbcmath/src/doaddsub.c
index 3b27ea8b1ef9f..e6bedd57ea08e 100644
--- a/ext/bcmath/libbcmath/src/doaddsub.c
+++ b/ext/bcmath/libbcmath/src/doaddsub.c
@@ -140,11 +140,11 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
 	}
 
 	/* Now add carry the longer integer part. */
-	if (sum_len - 1 != min_len) {
+	if (n1->n_len != n2->n_len) {
 		if (n2->n_len > n1->n_len) {
 			n1ptr = n2ptr;
 		}
-		for (count = sum_len - 1 - min_len; count > 0; count--) {
+		for (count = sum_len - min_len; count > 1; count--) {
 			*sumptr = *n1ptr-- + carry;
 			if (*sumptr >= BASE) {
 				*sumptr -= BASE;