From 30e1f29b4ba0c524288a50f975a9f2963f67bd0a Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Fri, 14 Mar 2025 23:03:09 +0900 Subject: [PATCH 1/4] WIP --- ext/bcmath/libbcmath/src/str2num.c | 95 +++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/ext/bcmath/libbcmath/src/str2num.c b/ext/bcmath/libbcmath/src/str2num.c index bd9a44a240503..7d252a51db5d6 100644 --- a/ext/bcmath/libbcmath/src/str2num.c +++ b/ext/bcmath/libbcmath/src/str2num.c @@ -105,6 +105,96 @@ static inline const char *bc_skip_zero_reverse(const char *scanner, const char * return scanner; } +static bool bc_scientific_notation_str2num( + bc_num *num, const char *str, const char *end, const char *integer_ptr, const char *fractional_ptr, const char *exponent_ptr, + size_t digits, size_t *full_scale) +{ + const char *fractional_end = exponent_ptr; + + /* In scientific notation, the mantissa always has one integer digit. */ + if (UNEXPECTED(digits != 1)) { + goto fail; + } + + /* Must be 1 <= mantissa < 10 */ + if (UNEXPECTED(*integer_ptr == 0)) { + goto fail; + } + + if (UNEXPECTED(*exponent_ptr != 'e' && *exponent_ptr != 'E')) { + goto fail; + } + exponent_ptr++; + + sign exponent_sign = PLUS; + if (*exponent_ptr == '+') { + /* Skip Sign */ + exponent_ptr++; + } else if (*exponent_ptr == '-') { + exponent_sign = MINUS; + exponent_ptr++; + } + + /* Skip exponent leading zeros. This is rare, so don't do bulk processing. */ + while (*exponent_ptr == '0') { + exponent_ptr++; + } + + const char *exponent_end = bc_count_digits(exponent_ptr, end); + if (UNEXPECTED(*exponent_end != '\0')) { + /* invalid num */ + goto fail; + } + + size_t exponent = 0; + while (exponent_ptr < exponent_end) { + exponent = exponent * 10 + (*exponent_ptr - '0'); /* TODO: check overflow */ + exponent_ptr++; + } + + size_t str_scale = fractional_end - fractional_ptr; + + if (exponent_sign == PLUS) { + digits += exponent; + str_scale = str_scale > exponent ? str_scale - exponent : 0; + + *num = bc_new_num_nonzeroed(digits, str_scale); + (*num)->n_sign = *str == '-' ? MINUS : PLUS; + char *nptr = (*num)->n_value; + char *nend = nptr + digits + str_scale; + + *nptr++ = *integer_ptr - '0'; + nptr = bc_copy_and_toggle_bcd(nptr, fractional_ptr, fractional_end); + while (nptr < nend) { + *nptr++ = 0; + } + } else { + digits = 0; + str_scale += exponent; + + *num = bc_new_num_nonzeroed(1, str_scale); // 1 is for 0 + (*num)->n_sign = *str == '-' ? MINUS : PLUS; + char *nptr = (*num)->n_value; + + for (size_t i = 0; i < exponent; i++) { + *nptr++ = 0; + } + + *nptr++ = *integer_ptr - '0'; + nptr = bc_copy_and_toggle_bcd(nptr, fractional_ptr, fractional_end); + } + + if (full_scale) { + *full_scale = str_scale; + } + + return true; + +fail: + *num = bc_copy_num(BCG(_zero_)); + return false; +} + /* Assumes `num` points to NULL, i.e. does yet not hold a number. */ bool bc_str2num(bc_num *num, const char *str, const char *end, size_t scale, size_t *full_scale, bool auto_scale) { @@ -151,9 +241,8 @@ bool bc_str2num(bc_num *num, const char *str, const char *end, size_t scale, siz /* validate */ fractional_end = bc_count_digits(fractional_ptr, end); - if (UNEXPECTED(*fractional_end != '\0')) { - /* invalid num */ - goto fail; + if (*fractional_end != '\0') { + return bc_scientific_notation_str2num(num, str, end, integer_ptr, fractional_ptr, fractional_end, digits, full_scale); } if (full_scale) { From dfa260b2a11cdd640f588b18a93244372033236e Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Sun, 16 Mar 2025 17:30:49 +0900 Subject: [PATCH 2/4] Removed the restriction on the integer part of the notation --- ext/bcmath/libbcmath/src/str2num.c | 56 ++++++++++++++---------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/ext/bcmath/libbcmath/src/str2num.c b/ext/bcmath/libbcmath/src/str2num.c index 7d252a51db5d6..635d2ccc675c8 100644 --- a/ext/bcmath/libbcmath/src/str2num.c +++ b/ext/bcmath/libbcmath/src/str2num.c @@ -111,16 +111,6 @@ static bool bc_scientific_notation_str2num( { const char *fractional_end = exponent_ptr; - /* In scientific notation, the mantissa always has one integer digit. */ - if (UNEXPECTED(digits != 1)) { - goto fail; - } - - /* Must be 1 <= mantissa < 10 */ - if (UNEXPECTED(*integer_ptr == 0)) { - goto fail; - } - if (UNEXPECTED(*exponent_ptr != 'e' && *exponent_ptr != 'E')) { goto fail; } @@ -152,36 +142,44 @@ static bool bc_scientific_notation_str2num( exponent_ptr++; } + const char *integer_end = integer_ptr + digits; + size_t str_scale = fractional_end - fractional_ptr; + size_t str_full_len = digits + str_scale; + size_t leading_zero_paddings = 0; if (exponent_sign == PLUS) { digits += exponent; - str_scale = str_scale > exponent ? str_scale - exponent : 0; - - *num = bc_new_num_nonzeroed(digits, str_scale); - (*num)->n_sign = *str == '-' ? MINUS : PLUS; - char *nptr = (*num)->n_value; - char *nend = nptr + digits + str_scale; - - *nptr++ = *integer_ptr - '0'; - nptr = bc_copy_and_toggle_bcd(nptr, fractional_ptr, fractional_end); - while (nptr < nend) { - *nptr++ = 0; + if (digits == 0) { + leading_zero_paddings = 1; } + str_scale = str_scale > exponent ? str_scale - exponent : 0; } else { - digits = 0; str_scale += exponent; + if (digits > exponent) { + digits -= exponent; + } else { + leading_zero_paddings = exponent - digits + 1; /* 1 is for interger part */ + digits = 0; + } + } - *num = bc_new_num_nonzeroed(1, str_scale); // 1 is for 0 - (*num)->n_sign = *str == '-' ? MINUS : PLUS; - char *nptr = (*num)->n_value; + *num = bc_new_num_nonzeroed(digits > 0 ? digits : 1, str_scale); /* 1 is for 0 */ + (*num)->n_sign = *str == '-' ? MINUS : PLUS; + char *nptr = (*num)->n_value; - for (size_t i = 0; i < exponent; i++) { + for (size_t i = 0; i < leading_zero_paddings; i++) { + *nptr++ = 0; + } + + nptr = bc_copy_and_toggle_bcd(nptr, integer_ptr, integer_end); + nptr = bc_copy_and_toggle_bcd(nptr, fractional_ptr, fractional_end); + + if (digits > str_full_len) { + /* Fill the rest integer part with zeros */ + for (size_t i = 0; i < digits - str_full_len; i++) { *nptr++ = 0; } - - *nptr++ = *integer_ptr - '0'; - nptr = bc_copy_and_toggle_bcd(nptr, fractional_ptr, fractional_end); } if (full_scale) { From fbe76fc95a8d9a375512290098c467f951c5232a Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Sun, 16 Mar 2025 17:36:23 +0900 Subject: [PATCH 3/4] Added handling for scientific notation when there are no fractional part --- ext/bcmath/libbcmath/src/str2num.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/ext/bcmath/libbcmath/src/str2num.c b/ext/bcmath/libbcmath/src/str2num.c index 635d2ccc675c8..6bb9dec45cbcd 100644 --- a/ext/bcmath/libbcmath/src/str2num.c +++ b/ext/bcmath/libbcmath/src/str2num.c @@ -109,7 +109,7 @@ static bool bc_scientific_notation_str2num( bc_num *num, const char *str, const char *end, const char *integer_ptr, const char *fractional_ptr, const char *exponent_ptr, size_t digits, size_t *full_scale) { - const char *fractional_end = exponent_ptr; + const char *fractional_end = fractional_ptr != NULL ? exponent_ptr : NULL; if (UNEXPECTED(*exponent_ptr != 'e' && *exponent_ptr != 'E')) { goto fail; @@ -173,7 +173,9 @@ static bool bc_scientific_notation_str2num( } nptr = bc_copy_and_toggle_bcd(nptr, integer_ptr, integer_end); - nptr = bc_copy_and_toggle_bcd(nptr, fractional_ptr, fractional_end); + if (fractional_ptr != NULL) { + nptr = bc_copy_and_toggle_bcd(nptr, fractional_ptr, fractional_end); + } if (digits > str_full_len) { /* Fill the rest integer part with zeros */ @@ -221,8 +223,8 @@ bool bc_str2num(bc_num *num, const char *str, const char *end, size_t scale, siz const char *decimal_point = (*ptr == '.') ? ptr : NULL; /* If a non-digit and non-decimal-point indicator is in the string, i.e. an invalid character */ - if (UNEXPECTED(!decimal_point && *ptr != '\0')) { - goto fail; + if (!decimal_point && *ptr != '\0') { + return bc_scientific_notation_str2num(num, str, end, integer_ptr, fractional_ptr, ptr, digits, full_scale); } /* search and validate fractional end if exists */ @@ -311,8 +313,4 @@ bool bc_str2num(bc_num *num, const char *str, const char *end, size_t scale, siz zero: *num = bc_copy_num(BCG(_zero_)); return true; - -fail: - *num = bc_copy_num(BCG(_zero_)); - return false; } From e3fa32c560de1a746192361b5a3f8f28d705856a Mon Sep 17 00:00:00 2001 From: Saki Takamachi Date: Sun, 16 Mar 2025 17:42:18 +0900 Subject: [PATCH 4/4] Added removing fraction trailing zeros --- ext/bcmath/libbcmath/src/str2num.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ext/bcmath/libbcmath/src/str2num.c b/ext/bcmath/libbcmath/src/str2num.c index 6bb9dec45cbcd..fe9e9a500333c 100644 --- a/ext/bcmath/libbcmath/src/str2num.c +++ b/ext/bcmath/libbcmath/src/str2num.c @@ -142,6 +142,13 @@ static bool bc_scientific_notation_str2num( exponent_ptr++; } + if (fractional_end != NULL) { + /* Skip fraction trailing zeros. This is rare, so don't do bulk processing. */ + while (fractional_end[-1] == '0') { + fractional_end--; + } + } + const char *integer_end = integer_ptr + digits; size_t str_scale = fractional_end - fractional_ptr;