Skip to content

Commit 3f76f94

Browse files
committed
Fix double-free on invalid large octal with separators
To clean up the mess here a bit, check for invalid octal digits with an explicit loop instead of mixing this into the string to number conversion. Also clean up some type usage.
1 parent ab4f725 commit 3f76f94

File tree

2 files changed

+60
-48
lines changed

2 files changed

+60
-48
lines changed

Zend/zend_language_scanner.l

Lines changed: 29 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ do { \
120120

121121
BEGIN_EXTERN_C()
122122

123-
static void strip_underscores(char *str, int *len)
123+
static void strip_underscores(char *str, size_t *len)
124124
{
125125
char *src = str, *dest = str;
126126
while (*src != '\0') {
@@ -1771,8 +1771,9 @@ NEWLINE ("\r"|"\n"|"\r\n")
17711771

17721772
<ST_IN_SCRIPTING>{BNUM} {
17731773
/* The +/- 2 skips "0b" */
1774-
int len = yyleng - 2, contains_underscores;
1774+
size_t len = yyleng - 2;
17751775
char *end, *bin = yytext + 2;
1776+
zend_bool contains_underscores;
17761777

17771778
/* Skip any leading 0s */
17781779
while (len > 0 && (*bin == '0' || *bin == '_')) {
@@ -1811,10 +1812,25 @@ NEWLINE ("\r"|"\n"|"\r\n")
18111812
}
18121813

18131814
<ST_IN_SCRIPTING>{LNUM} {
1814-
int len = yyleng, contains_underscores;
1815+
size_t len = yyleng;
18151816
char *end, *lnum = yytext;
1816-
1817-
contains_underscores = (memchr(lnum, '_', len) != NULL);
1817+
zend_bool is_octal = lnum[0] == '0';
1818+
zend_bool contains_underscores = (memchr(lnum, '_', len) != NULL);
1819+
1820+
/* Digits 8 and 9 are illegal in octal literals. */
1821+
if (is_octal) {
1822+
size_t i;
1823+
for (i = 0; i < len; i++) {
1824+
if (lnum[i] == '8' || lnum[i] == '9') {
1825+
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1826+
ZVAL_UNDEF(zendlval);
1827+
if (PARSER_MODE()) {
1828+
RETURN_TOKEN(T_ERROR);
1829+
}
1830+
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
1831+
}
1832+
}
1833+
}
18181834

18191835
if (contains_underscores) {
18201836
lnum = estrndup(lnum, len);
@@ -1824,21 +1840,8 @@ NEWLINE ("\r"|"\n"|"\r\n")
18241840
if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
18251841
errno = 0;
18261842
/* base must be passed explicitly for correct parse error on Windows */
1827-
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, lnum[0] == '0' ? 8 : 10));
1828-
/* This isn't an assert, we need to ensure 019 isn't valid octal
1829-
* Because the lexing itself doesn't do that for us
1830-
*/
1831-
if (end != lnum + len) {
1832-
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1833-
ZVAL_UNDEF(zendlval);
1834-
if (contains_underscores) {
1835-
efree(lnum);
1836-
}
1837-
if (PARSER_MODE()) {
1838-
RETURN_TOKEN(T_ERROR);
1839-
}
1840-
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
1841-
}
1843+
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, is_octal ? 8 : 10));
1844+
ZEND_ASSERT(end == lnum + len);
18421845
} else {
18431846
errno = 0;
18441847
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
@@ -1849,35 +1852,13 @@ NEWLINE ("\r"|"\n"|"\r\n")
18491852
} else {
18501853
ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
18511854
}
1852-
/* Also not an assert for the same reason */
1853-
if (end != lnum + len) {
1854-
zend_throw_exception(zend_ce_parse_error,
1855-
"Invalid numeric literal", 0);
1856-
ZVAL_UNDEF(zendlval);
1857-
if (contains_underscores) {
1858-
efree(lnum);
1859-
}
1860-
if (PARSER_MODE()) {
1861-
RETURN_TOKEN(T_ERROR);
1862-
}
1863-
}
1855+
ZEND_ASSERT(end == lnum + len);
18641856
if (contains_underscores) {
18651857
efree(lnum);
18661858
}
18671859
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
18681860
}
1869-
/* Also not an assert for the same reason */
1870-
if (end != lnum + len) {
1871-
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
1872-
ZVAL_UNDEF(zendlval);
1873-
if (contains_underscores) {
1874-
efree(lnum);
1875-
}
1876-
if (PARSER_MODE()) {
1877-
RETURN_TOKEN(T_ERROR);
1878-
}
1879-
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
1880-
}
1861+
ZEND_ASSERT(end == lnum + len);
18811862
}
18821863
ZEND_ASSERT(!errno);
18831864
if (contains_underscores) {
@@ -1888,8 +1869,9 @@ NEWLINE ("\r"|"\n"|"\r\n")
18881869

18891870
<ST_IN_SCRIPTING>{HNUM} {
18901871
/* The +/- 2 skips "0x" */
1891-
int len = yyleng - 2, contains_underscores;
1872+
size_t len = yyleng - 2;
18921873
char *end, *hex = yytext + 2;
1874+
zend_bool contains_underscores;
18931875

18941876
/* Skip any leading 0s */
18951877
while (len > 0 && (*hex == '0' || *hex == '_')) {
@@ -1954,10 +1936,9 @@ string:
19541936

19551937
<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
19561938
const char *end;
1957-
int len = yyleng, contains_underscores;
1939+
size_t len = yyleng;
19581940
char *dnum = yytext;
1959-
1960-
contains_underscores = (memchr(dnum, '_', len) != NULL);
1941+
zend_bool contains_underscores = (memchr(dnum, '_', len) != NULL);
19611942

19621943
if (contains_underscores) {
19631944
dnum = estrndup(dnum, len);
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
--TEST--
2+
Large invalid octal number with underscores
3+
--FILE--
4+
<?php
5+
6+
var_dump(token_get_all("<?php 0_10000000000000000000009;"));
7+
8+
?>
9+
--EXPECTF--
10+
array(3) {
11+
[0]=>
12+
array(3) {
13+
[0]=>
14+
int(%d)
15+
[1]=>
16+
string(6) "<?php "
17+
[2]=>
18+
int(1)
19+
}
20+
[1]=>
21+
array(3) {
22+
[0]=>
23+
int(%d)
24+
[1]=>
25+
string(25) "0_10000000000000000000009"
26+
[2]=>
27+
int(1)
28+
}
29+
[2]=>
30+
string(1) ";"
31+
}

0 commit comments

Comments
 (0)