diff --git a/NEWS b/NEWS
index bd7c6b3a26ae0..c8ae220dc8eef 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,8 @@ PHP                                                                        NEWS
 - BCMath:
   . Simplify `bc_divide()` code. (SakiTakamachi)
   . If the result is 0, n_scale is set to 0. (SakiTakamachi)
+  . If size of BC_VECTOR array is within 64 bytes, stack area is now used.
+    (SakiTakamachi)
 
 - CLI:
   . Add --ini=diff to print INI settings changed from the builtin default.
diff --git a/ext/bcmath/libbcmath/src/div.c b/ext/bcmath/libbcmath/src/div.c
index ec7619fb77090..87339f8b9c37d 100644
--- a/ext/bcmath/libbcmath/src/div.c
+++ b/ext/bcmath/libbcmath/src/div.c
@@ -260,7 +260,15 @@ static void bc_do_div(
 	size_t quot_arr_size = numerator_arr_size - divisor_arr_size + 1;
 	size_t quot_real_arr_size = MIN(quot_arr_size, (quot_size + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE);
 
-	BC_VECTOR *numerator_vectors = safe_emalloc(numerator_arr_size + divisor_arr_size + quot_arr_size, sizeof(BC_VECTOR), 0);
+	BC_VECTOR stack_vectors[BC_STACK_VECTOR_SIZE];
+	size_t allocation_arr_size = numerator_arr_size + divisor_arr_size + quot_arr_size;
+
+	BC_VECTOR *numerator_vectors;
+	if (allocation_arr_size <= BC_STACK_VECTOR_SIZE) {
+		numerator_vectors = stack_vectors;
+	} else {
+		numerator_vectors = safe_emalloc(allocation_arr_size, sizeof(BC_VECTOR), 0);
+	}
 	BC_VECTOR *divisor_vectors = numerator_vectors + numerator_arr_size;
 	BC_VECTOR *quot_vectors = divisor_vectors + divisor_arr_size;
 
@@ -302,7 +310,9 @@ static void bc_do_div(
 		quot_vectors[i] /= BASE;
 	}
 
-	efree(numerator_vectors);
+	if (allocation_arr_size > BC_STACK_VECTOR_SIZE) {
+		efree(numerator_vectors);
+	}
 }
 
 static inline void bc_divide_by_one(bc_num numerator, bc_num *quot, size_t quot_scale)
diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h
index 1a911442dc9a1..7e972952c75e3 100644
--- a/ext/bcmath/libbcmath/src/private.h
+++ b/ext/bcmath/libbcmath/src/private.h
@@ -64,6 +64,9 @@
 #  define BC_LITTLE_ENDIAN 1
 #endif
 
+/* 64-bytes for 64-bit */
+#define BC_STACK_VECTOR_SIZE 8
+
 /*
  * Adding more than this many times may cause uint32_t/uint64_t to overflow.
  * Typically this is 1844 for 64bit and 42 for 32bit.
diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c
index de06a4ca037ec..fc7efb37ebf02 100644
--- a/ext/bcmath/libbcmath/src/recmul.c
+++ b/ext/bcmath/libbcmath/src/recmul.c
@@ -149,15 +149,21 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, bc
 	size_t n2_arr_size = (n2len + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
 	size_t prod_arr_size = (prodlen + BC_VECTOR_SIZE - 1) / BC_VECTOR_SIZE;
 
-	/*
-	 * let's say that N is the max of n1len and n2len (and a multiple of BC_VECTOR_SIZE for simplicity),
-	 * then this sum is <= N/BC_VECTOR_SIZE + N/BC_VECTOR_SIZE + N/BC_VECTOR_SIZE + N/BC_VECTOR_SIZE - 1
-	 * which is equal to N - 1 if BC_VECTOR_SIZE is 4, and N/2 - 1 if BC_VECTOR_SIZE is 8.
-	 */
-	BC_VECTOR *buf = safe_emalloc(n1_arr_size + n2_arr_size + prod_arr_size, sizeof(BC_VECTOR), 0);
+	BC_VECTOR stack_vectors[BC_STACK_VECTOR_SIZE];
+	size_t allocation_arr_size = n1_arr_size + n2_arr_size + prod_arr_size;
 
-	BC_VECTOR *n1_vector = buf;
-	BC_VECTOR *n2_vector = buf + n1_arr_size;
+	BC_VECTOR *n1_vector;
+	if (allocation_arr_size <= BC_STACK_VECTOR_SIZE) {
+		n1_vector = stack_vectors;
+	} else {
+		/*
+		 * let's say that N is the max of n1len and n2len (and a multiple of BC_VECTOR_SIZE for simplicity),
+		 * then this sum is <= N/BC_VECTOR_SIZE + N/BC_VECTOR_SIZE + N/BC_VECTOR_SIZE + N/BC_VECTOR_SIZE - 1
+		 * which is equal to N - 1 if BC_VECTOR_SIZE is 4, and N/2 - 1 if BC_VECTOR_SIZE is 8.
+		 */
+		n1_vector = safe_emalloc(allocation_arr_size, sizeof(BC_VECTOR), 0);
+	}
+	BC_VECTOR *n2_vector = n1_vector + n1_arr_size;
 	BC_VECTOR *prod_vector = n2_vector + n2_arr_size;
 
 	for (i = 0; i < prod_arr_size; i++) {
@@ -188,7 +194,9 @@ static void bc_standard_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, bc
 
 	bc_mul_finish_from_vector(prod_vector, prod_arr_size, prodlen, prod);
 
-	efree(buf);
+	if (allocation_arr_size > BC_STACK_VECTOR_SIZE) {
+		efree(n1_vector);
+	}
 }
 
 /** This is bc_standard_mul implementation for square */