php · nielsdos · Sep 17, 2024 · Aug 16, 2023 · Sep 7, 2024 · Sep 7, 2024
@@ -146,6 +146,14 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale);
 	*(result) = mul_ex;                             \
 } while (0)
 
+bc_num bc_square(bc_num n1, size_t scale);
+
+#define bc_square_ex(n1, result, scale_min) do {	\
+	bc_num square_ex = bc_square(n1, scale_min); \
+	bc_free_num (result);                           \
+	*(result) = square_ex;                             \
+} while (0)
+
 bool bc_divide(bc_num n1, bc_num n2, bc_num *quot, size_t scale);
 
 bool bc_modulo(bc_num num1, bc_num num2, bc_num *resul, size_t scale);

@@ -69,7 +69,7 @@ void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
 	pwrscale = num1->n_scale;
 	while ((exponent & 1) == 0) {
 		pwrscale = 2 * pwrscale;
-		bc_multiply_ex(power, power, &power, pwrscale);
+		bc_square_ex(power, &power, pwrscale);
 		exponent = exponent >> 1;
 	}
 	temp = bc_copy_num(power);
@@ -79,7 +79,7 @@ void bc_raise(bc_num num1, long exponent, bc_num *result, size_t scale)
 	/* Do the calculation. */
 	while (exponent > 0) {
 		pwrscale = 2 * pwrscale;
-		bc_multiply_ex(power, power, &power, pwrscale);
+		bc_square_ex(power, &power, pwrscale);
 		if ((exponent & 1) == 1) {
 			calcscale = pwrscale + calcscale;
 			bc_multiply_ex(temp, power, &temp, calcscale);

@@ -72,6 +72,24 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len,
 	}
 }
 
+static inline void bc_fast_square(bc_num n1, size_t n1len, bc_num *prod)
+{
+	const char *n1end = n1->n_value + n1len - 1;
+
+	BC_VECTOR n1_vector = bc_partial_convert_to_vector(n1end, n1len);
+	BC_VECTOR prod_vector = n1_vector * n1_vector;
+
+	size_t prodlen = n1len + n1len;
+	*prod = bc_new_num_nonzeroed(prodlen, 0);
+	char *pptr = (*prod)->n_value;
+	char *pend = pptr + prodlen - 1;
+
+	while (pend >= pptr) {
+		*pend-- = prod_vector % BASE;
+		prod_vector /= BASE;
+	}
+}
+
 /*
  * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_VECTOR.
  * The array is generated starting with the smaller digits.
@@ -163,7 +181,82 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, bc
 	efree(buf);
 }
 
-/* The multiply routine.  N2 times N1 is put int PROD with the scale of
+/** This is bc_standard_mul implementation for square */
+static void bc_standard_square(bc_num n1, size_t n1len, bc_num *prod)
+{
+	size_t i;
+	const char *n1end = n1->n_value + n1len - 1;
+	size_t prodlen = n1len + n1len;
+
+	size_t n1_arr_size = (n1len + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
+	size_t prod_arr_size = (prodlen + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
+
+	BC_VECTOR *buf = safe_emalloc(n1_arr_size + n1_arr_size + prod_arr_size, sizeof(BC_VECTOR), 0);
+
+	BC_VECTOR *n1_vector = buf;
+	BC_VECTOR *prod_vector = n1_vector + n1_arr_size + n1_arr_size;
+
+	for (i = 0; i < prod_arr_size; i++) {
+		prod_vector[i] = 0;
+	}
+
+	/* Convert to BC_VECTOR[] */
+	bc_convert_to_vector(n1_vector, n1end, n1len);
+
+	/* Multiplication and addition */
+	size_t count = 0;
+	for (i = 0; i < n1_arr_size; i++) {
+		/*
+		 * This calculation adds the result multiple times to the array entries.
+		 * When multiplying large numbers of digits, there is a possibility of
+		 * overflow, so digit adjustment is performed beforehand.
+		 */
+		if (UNEXPECTED(count >= BC_VECTOR_NO_OVERFLOW_ADD_COUNT)) {
+			bc_mul_carry_calc(prod_vector, prod_arr_size);
+			count = 0;
+		}
+		count++;
+		for (size_t j = 0; j < n1_arr_size; j++) {
+			prod_vector[i + j] += n1_vector[i] * n1_vector[j];
+		}
+	}
+
+	/*
+	 * Move a value exceeding 4/8 digits by carrying to the next digit.
+	 * However, the last digit does nothing.
+	 */
+	bc_mul_carry_calc(prod_vector, prod_arr_size);
+
+	/* Convert to bc_num */
+	*prod = bc_new_num_nonzeroed(prodlen, 0);
+	char *pptr = (*prod)->n_value;
+	char *pend = pptr + prodlen - 1;
+	i = 0;
+	while (i < prod_arr_size - 1) {
+#if BC_VECTOR_SIZE == 4
+		bc_write_bcd_representation(prod_vector[i], pend - 3);
+		pend -= 4;
+#else
+		bc_write_bcd_representation(prod_vector[i] / 10000, pend - 7);
+		bc_write_bcd_representation(prod_vector[i] % 10000, pend - 3);
+		pend -= 8;
+#endif
+		i++;
+	}
+
+	/*
+	 * The last digit may carry over.
+	 * Also need to fill it to the end with zeros, so loop until the end of the string.
+	 */
+	while (pend >= pptr) {
+		*pend-- = prod_vector[i] % BASE;
+		prod_vector[i] /= BASE;
+	}
+
+	efree(buf);
+}
+
+/* The multiply routine. N2 times N1 is put int PROD with the scale of
    the result being MIN(N2 scale+N1 scale, MAX (SCALE, N2 scale, N1 scale)).
    */
 
@@ -194,3 +287,28 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale)
 	}
 	return prod;
 }
+
+bc_num bc_square(bc_num n1, size_t scale)
+{
+	bc_num prod;
+
+	/* Initialize things. */
+	size_t len1 = n1->n_len + n1->n_scale;
+	size_t full_scale = n1->n_scale + n1->n_scale;
+	size_t prod_scale = MIN(full_scale, MAX(scale, n1->n_scale));
+
+	/* Do the square */
+	if (len1 <= BC_VECTOR_SIZE) {
+		bc_fast_square(n1, len1, &prod);
+	} else {
+		bc_standard_square(n1, len1, &prod);
+	}
+
+	/* Assign to prod and clean up the number. */
+	prod->n_sign = PLUS;
+	prod->n_len -= full_scale;
+	prod->n_scale = prod_scale;
+	_bc_rm_leading_zeros(prod);
+
+	return prod;
+}