Skip to content
This repository was archived by the owner on Oct 13, 2020. It is now read-only.

Commit 877c465

Browse files
committed
CDRIVER-1377 bson_stroll detects range errors
1 parent 1048f4f commit 877c465

File tree

3 files changed

+171
-165
lines changed

3 files changed

+171
-165
lines changed

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ New features and bug fixes:
2323
as a string and ignored the scope.
2424
* bson_json_reader functions now always validate UTF-8
2525
* JSON parsing now preserves integer width
26+
* bson_strtoll now matches stroll: it detects range errors, and when
27+
parsing octal it stops at non-octal digits and returns what it parsed
28+
instead of setting errno
2629
* New flag BSON_VALIDATE_EMPTY_KEYS causes bson_validate to fail if a document
2730
contains zero-length field names.
2831
* The configure option "--enable-hardening" had had no effect. It is removed

src/bson/bson-string.c

Lines changed: 106 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -679,8 +679,8 @@ bson_snprintf (char *str, /* IN */
679679
* valid.
680680
*
681681
* If an invalid value is encountered, errno will be set to EINVAL and
682-
* zero will be returned. This function does not currently detect values
683-
* that are out of range.
682+
* zero will be returned. If the number is out of range, errno is set to
683+
* ERANGE and LLONG_MAX or LLONG_MIN is returned.
684684
*
685685
* Returns:
686686
* The result of the conversion.
@@ -696,123 +696,108 @@ bson_ascii_strtoll (const char *s,
696696
char **e,
697697
int base)
698698
{
699-
char *tok = (char *)s;
700-
char c;
701-
int64_t number = 0;
702-
int64_t sign = 1;
703-
704-
if (!s) {
705-
errno = EINVAL;
706-
return 0;
707-
}
708-
709-
c = *tok;
710-
711-
while (isspace (c)) {
712-
c = *++tok;
713-
}
714-
715-
if (!isdigit (c) && (c != '+') && (c != '-')) {
716-
if (e != NULL) {
717-
*e = tok - 1;
718-
}
719-
errno = EINVAL;
720-
return 0;
721-
}
722-
723-
if (c == '-') {
724-
sign = -1;
725-
c = *++tok;
726-
}
727-
728-
if (c == '+') {
729-
c = *++tok;
730-
}
731-
732-
if (c == '0' && tok[1] != '\0' &&
733-
(isdigit (tok[1]) || tok[1] == 'x' || tok[1] == 'X')) {
734-
/* Hex, octal or decimal */
735-
736-
c = *++tok;
737-
738-
if (c == 'x' || c == 'X') { /* Hex */
739-
if (base != 16 && base != 0) {
740-
if (e != NULL) {
741-
*e = (char *)(s);
742-
}
743-
errno = EINVAL;
744-
return 0;
745-
}
746-
747-
c = *++tok;
748-
if (!isxdigit (c)) {
749-
if (e != NULL) {
750-
*e = tok;
751-
}
752-
errno = EINVAL;
753-
return 0;
754-
}
755-
do {
756-
number = (number << 4) + (c - '0');
757-
c = *(++tok);
758-
} while (isxdigit (c));
759-
}
760-
else { /* Octal or Decimal -- prefixed with 0 */
761-
if (base != 8 && base != 0 && base != 10) {
762-
if (e != NULL) {
763-
*e = (char *)(s);
764-
}
765-
errno = EINVAL;
766-
return 0;
767-
}
768-
if (base == 10) {
769-
do {
770-
number = (number * 10) + (c - '0');
771-
c = *(++tok);
772-
} while (isdigit (c));
773-
} else { /*Octal*/
774-
if ( c < '0' || c >= '8') {
775-
if (e != NULL) {
776-
*e = tok;
777-
}
778-
errno = EINVAL;
779-
return 0;
780-
}
781-
do {
782-
number = (number << 3) + (c - '0');
783-
c = *(++tok);
784-
} while (('0' <= c) && (c < '8'));
785-
}
786-
}
787-
788-
while (c == 'l' || c == 'L' || c == 'u' || c == 'U') {
789-
c = *++tok;
790-
}
791-
}
792-
else {
793-
/* Decimal */
794-
if (base != 10) {
795-
if (e != NULL) {
796-
*e = (char *)(s);
797-
}
798-
errno = EINVAL;
799-
return 0;
800-
}
801-
802-
do {
803-
number = (number * 10) + (c - '0');
804-
c = *(++tok);
805-
} while (isdigit (c));
806-
807-
while (c == 'l' || c == 'L' || c == 'u' || c == 'U') {
808-
c = *(++tok);
809-
}
810-
}
811-
812-
if (e != NULL) {
813-
*e = tok;
814-
}
815-
errno = 0;
816-
return (sign * number);
817-
}
699+
char *tok = (char *) s;
700+
char *digits_start;
701+
char c;
702+
int64_t number = 0;
703+
int64_t sign = 1;
704+
int64_t cutoff;
705+
int64_t cutlim;
706+
707+
errno = 0;
708+
709+
if (!s) {
710+
errno = EINVAL;
711+
return 0;
712+
}
713+
714+
c = *tok;
715+
716+
while (isspace (c)) {
717+
c = *++tok;
718+
}
719+
720+
if (c == '-') {
721+
sign = -1;
722+
c = *++tok;
723+
} else if (c == '+') {
724+
c = *++tok;
725+
} else if (!isdigit (c)) {
726+
errno = EINVAL;
727+
return 0;
728+
}
729+
730+
/* from here down, inspired by NetBSD's strtoll */
731+
if ((base == 0 || base == 16) && c == '0' &&
732+
(tok[1] == 'x' || tok[1] == 'X')) {
733+
tok += 2;
734+
c = *tok;
735+
base = 16;
736+
}
737+
738+
if (base == 0) {
739+
base = c == '0' ? 8 : 10;
740+
}
741+
742+
/* Cutoff is the greatest magnitude we'll be able to multiply by base without
743+
* range error. If the current number is past cutoff and we see valid digit,
744+
* fail. If the number is *equal* to cutoff, then the next digit must be less
745+
* than cutlim, otherwise fail.
746+
*/
747+
cutoff = sign == -1 ? LLONG_MIN : LLONG_MAX;
748+
cutlim = (int) (cutoff % base);
749+
cutoff /= base;
750+
if (sign == -1) {
751+
if (cutlim > 0) {
752+
cutlim -= base;
753+
cutoff += 1;
754+
}
755+
cutlim = -cutlim;
756+
}
818757

758+
digits_start = tok;
759+
760+
while ((c = *tok)) {
761+
if (isdigit (c)) {
762+
c -= '0';
763+
} else if (isalpha (c)) {
764+
c -= isupper (c) ? 'A' - 10 : 'a' - 10;
765+
} else {
766+
/* end of number string */
767+
break;
768+
}
769+
770+
if (c >= base) {
771+
break;
772+
}
773+
774+
if (sign == -1) {
775+
if (number < cutoff || (number == cutoff && c > cutlim)) {
776+
number = LLONG_MIN;
777+
errno = ERANGE;
778+
break;
779+
} else {
780+
number *= base;
781+
number -= c;
782+
}
783+
} else {
784+
if (number > cutoff || (number == cutoff && c > cutlim)) {
785+
number = LLONG_MAX;
786+
errno = ERANGE;
787+
break;
788+
} else {
789+
number *= base;
790+
number += c;
791+
}
792+
}
793+
794+
tok++;
795+
}
796+
797+
/* did we parse any digits at all? */
798+
if (e != NULL && tok > digits_start) {
799+
*e = tok;
800+
}
801+
802+
return number;
803+
}

tests/test-string.c

Lines changed: 62 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -192,69 +192,87 @@ typedef struct
192192
const char *str;
193193
int base;
194194
int64_t rv;
195+
const char *remaining;
195196
int _errno;
196197
} strtoll_test;
197198

198199

199200
static void
200201
test_bson_ascii_strtoll (void)
201202
{
203+
#ifdef END
204+
#undef END
205+
#endif
206+
#define END ""
202207
int64_t rv;
203208
int i;
209+
char *endptr;
204210
strtoll_test tests[] = {
205-
/* input, base, expected output, expected errno */
206-
{ "1", 10, 1, 0 },
207-
{ "+1", 10, 1, 0 },
208-
{ "-1", 10, -1, 0 },
209-
{ "0", 10, 0, 0 },
210-
{ "0 ", 10, 0, 0 },
211-
{ " 0 ", 10, 0, 0 },
212-
{ " 0", 10, 0, 0 },
213-
{ " 0\"", 10, 0, 0 },
214-
{ "0l", 10, 0, 0 },
215-
{ "0l ", 10, 0, 0 },
216-
{ "0u", 10, 0, 0 },
217-
{ "0u ", 10, 0, 0 },
218-
{ "0L", 10, 0, 0 },
219-
{ "0L ", 10, 0, 0 },
220-
{ "0U", 10, 0, 0 },
221-
{ "0U ", 10, 0, 0 },
222-
{ "-0", 10, 0, 0 },
223-
{ "+0", 10, 0, 0 },
224-
{ "010", 8, 8, 0 },
225-
{ "08", 8, 0, EINVAL },
226-
{ "010", 10, 10, 0 },
227-
{ "010", 8, 8, 0 },
228-
{ "010", 0, 8, 0 },
229-
{ "68719476736", 10, 68719476736, 0 },
230-
{ "-68719476736", 10, -68719476736, 0 },
231-
{ "+68719476736", 10, 68719476736, 0 },
232-
{ " 68719476736 ", 10, 68719476736, 0 },
233-
{ " -68719476736 ", 10, -68719476736, 0 },
234-
{ " 4611686018427387904LL", 10, 4611686018427387904LL, 0 },
235-
{ " -4611686018427387904LL ", 10, -4611686018427387904LL, 0 },
236-
{ "0x1000000000", 16, 68719476736, 0 },
237-
{ "0x1000000000", 0, 68719476736, 0 },
238-
{ "-0x1000000000", 16, -68719476736, 0 },
239-
{ "+0x1000000000", 16, 68719476736, 0 },
240-
{ "01234", 8, 668, 0 },
241-
{ "-01234", 8, -668, 0 },
242-
{ "+01234", 8, 668, 0 },
211+
/* input, base, expected output, # of chars parsed, expected errno */
212+
{ "1", 10, 1, END, 0 },
213+
{ "+1", 10, 1, END, 0 },
214+
{ "-1", 10, -1, END, 0 },
215+
{ "0", 10, 0, END, 0 },
216+
{ "0 ", 10, 0, " ", 0 },
217+
{ " 0 ", 10, 0, " ", 0 },
218+
{ " 0", 10, 0, END, 0 },
219+
{ " 0\"", 10, 0, "\"", 0 },
220+
{ "0l", 10, 0, "l", 0 },
221+
{ "0l ", 10, 0, "l ", 0 },
222+
{ "0u", 10, 0, "u", 0 },
223+
{ "0u ", 10, 0, "u ", 0 },
224+
{ "0L", 10, 0, "L", 0 },
225+
{ "0L ", 10, 0, "L ", 0 },
226+
{ "0U", 10, 0, "U", 0 },
227+
{ "0U ", 10, 0, "U ", 0 },
228+
{ "-0", 10, 0, END, 0 },
229+
{ "+0", 10, 0, END, 0 },
230+
{ "010", 8, 8, END, 0 },
231+
/* stroll "takes as many characters as possible to form a valid base-n
232+
* integer", so it ignores "8" and returns 0 */
233+
{ "08", 0, 0, "8", 0 },
234+
{ "010", 10, 10, END, 0 },
235+
{ "010", 8, 8, END, 0 },
236+
{ "010", 0, 8, END, 0 },
237+
{ "68719476736", 10, 68719476736, END, 0 },
238+
{ "-68719476736", 10, -68719476736, END, 0 },
239+
{ "+68719476736", 10, 68719476736, END, 0 },
240+
{ " 68719476736 ", 10, 68719476736, " ", 0 },
241+
{ " 68719476736 ", 0, 68719476736, " ", 0 },
242+
{ " -68719476736 ", 10, -68719476736, " ", 0 },
243+
{ " -68719476736 ", 0, -68719476736, " ", 0 },
244+
{ " 4611686018427387904LL", 10, 4611686018427387904LL, "LL", 0 },
245+
{ " -4611686018427387904LL ", 10, -4611686018427387904LL, "LL ", 0 },
246+
{ "0x1000000000", 16, 68719476736, END, 0 },
247+
{ "0x1000000000", 0, 68719476736, END, 0 },
248+
{ "-0x1000000000", 16, -68719476736, END, 0 },
249+
{ "-0x1000000000", 0, -68719476736, END, 0 },
250+
{ "+0x1000000000", 16, 68719476736, END, 0 },
251+
{ "+0x1000000000", 0, 68719476736, END, 0 },
252+
{ "01234", 8, 668, END, 0 },
253+
{ "01234", 0, 668, END, 0 },
254+
{ "-01234", 8, -668, END, 0 },
255+
{ "-01234", 0, -668, END, 0 },
256+
{ "+01234", 8, 668, END, 0 },
257+
{ "+01234", 0, 668, END, 0 },
258+
{ "9223372036854775807", 10, LLONG_MAX, END, 0},
259+
{ "-9223372036854775808", 10, LLONG_MIN, END, 0},
260+
{ "9223372036854775808", 10, LLONG_MAX, "8", ERANGE}, /* LLONG_MAX+1 */
261+
{ "-9223372036854775809", 10, LLONG_MIN, "9", ERANGE}, /* LLONG_MIN-1 */
262+
{ "18446744073709551615", 10, LLONG_MAX, "5", ERANGE}, /* 2*LLONG_MAX+1 */
263+
{ "-18446744073709551618", 10, LLONG_MIN, "8", ERANGE},/* 2*LLONG_MIN-1 */
243264
{ NULL }
244265
};
245266

246267
for (i = 0; tests [i].str; i++) {
247268
errno = 0;
248269

249-
rv = bson_ascii_strtoll (tests [i].str, NULL, tests [i].base);
250-
251-
#if 0
252-
fprintf (stderr, "rv=%"PRId64" errno=%d\n", rv, errno);
253-
#endif
254-
270+
rv = bson_ascii_strtoll (tests [i].str, &endptr, tests [i].base);
255271
assert_cmpint (rv, ==, tests [i].rv);
256272
assert_cmpint (errno, ==, tests [i]._errno);
273+
assert_cmpstr (endptr, tests [i].remaining);
257274
}
275+
#undef END
258276
}
259277

260278

0 commit comments

Comments
 (0)