Skip to content

Commit 5d0bcd8

Browse files
committed
zend_atoi/zend_atol cleanup
zend_ato[il]() don't just do number parsing. They also check for a 'K', 'M', or 'G' at the end of the string, and multiply the parsed value out accordingly. Unfortunately, they ignore any other non-numerics between the numeric component and the last character in the string. This means that numbers such as the following are both valid and non-intuitive in their final output. * "123KMG" is interpreted as "123G" -> 132070244352 * "123G " is interpreted as "123 " -> 123 * "123GB" is interpreted as "123B" -> 123 * "123 I like tacos." is also interpreted as "123." -> 123 This diff primarily adds warnings for these cases when the output would be a potentially unexpected, and unhelpful value. Additionally, with these changes, zend_atoi() is no longer used in php-src, but I left it as a valid API for 3rd party extensions. Note that I did make it a proxy to zend_atol() since deferring the truncation till the end is effectively the same as truncation during multiplication, but this avoid code duplication. I think we should consider removing zend_atoi() entirely (perhaps in 8.0?) and rename zend_atol() to something reflecting it's KMG specific behavior.
1 parent ef4f6a3 commit 5d0bcd8

File tree

3 files changed

+270
-40
lines changed

3 files changed

+270
-40
lines changed

Zend/tests/zend_atol.phpt

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
--TEST--
2+
Test parsing of INI "size" values (e.g. "16M")
3+
--FILE--
4+
<?php
5+
6+
// This test checks valid formats do not throw any warnings.
7+
foreach (['', ' '] as $leadingWS) {
8+
foreach (['', '+', '-'] as $sign) {
9+
foreach (['', ' '] as $midWS) {
10+
foreach (['', 'K', 'k', 'M', 'm', 'G', 'g'] as $exp) {
11+
foreach (['', ' '] as $trailingWS) {
12+
$setting = sprintf('%s%s1%s%s%s',
13+
$leadingWS, $sign, $midWS, $exp, $trailingWS);
14+
var_dump($setting);
15+
16+
// Technically, negative values don't make sense for socket timout,
17+
// but it doesn't validate that so use it anyway.
18+
ini_set('default_socket_timeout', $setting);
19+
}
20+
}
21+
}
22+
}
23+
}
24+
25+
--EXPECT--
26+
string(1) "1"
27+
string(2) "1 "
28+
string(2) "1K"
29+
string(3) "1K "
30+
string(2) "1k"
31+
string(3) "1k "
32+
string(2) "1M"
33+
string(3) "1M "
34+
string(2) "1m"
35+
string(3) "1m "
36+
string(2) "1G"
37+
string(3) "1G "
38+
string(2) "1g"
39+
string(3) "1g "
40+
string(2) "1 "
41+
string(3) "1 "
42+
string(3) "1 K"
43+
string(4) "1 K "
44+
string(3) "1 k"
45+
string(4) "1 k "
46+
string(3) "1 M"
47+
string(4) "1 M "
48+
string(3) "1 m"
49+
string(4) "1 m "
50+
string(3) "1 G"
51+
string(4) "1 G "
52+
string(3) "1 g"
53+
string(4) "1 g "
54+
string(2) "+1"
55+
string(3) "+1 "
56+
string(3) "+1K"
57+
string(4) "+1K "
58+
string(3) "+1k"
59+
string(4) "+1k "
60+
string(3) "+1M"
61+
string(4) "+1M "
62+
string(3) "+1m"
63+
string(4) "+1m "
64+
string(3) "+1G"
65+
string(4) "+1G "
66+
string(3) "+1g"
67+
string(4) "+1g "
68+
string(3) "+1 "
69+
string(4) "+1 "
70+
string(4) "+1 K"
71+
string(5) "+1 K "
72+
string(4) "+1 k"
73+
string(5) "+1 k "
74+
string(4) "+1 M"
75+
string(5) "+1 M "
76+
string(4) "+1 m"
77+
string(5) "+1 m "
78+
string(4) "+1 G"
79+
string(5) "+1 G "
80+
string(4) "+1 g"
81+
string(5) "+1 g "
82+
string(2) "-1"
83+
string(3) "-1 "
84+
string(3) "-1K"
85+
string(4) "-1K "
86+
string(3) "-1k"
87+
string(4) "-1k "
88+
string(3) "-1M"
89+
string(4) "-1M "
90+
string(3) "-1m"
91+
string(4) "-1m "
92+
string(3) "-1G"
93+
string(4) "-1G "
94+
string(3) "-1g"
95+
string(4) "-1g "
96+
string(3) "-1 "
97+
string(4) "-1 "
98+
string(4) "-1 K"
99+
string(5) "-1 K "
100+
string(4) "-1 k"
101+
string(5) "-1 k "
102+
string(4) "-1 M"
103+
string(5) "-1 M "
104+
string(4) "-1 m"
105+
string(5) "-1 m "
106+
string(4) "-1 G"
107+
string(5) "-1 G "
108+
string(4) "-1 g"
109+
string(5) "-1 g "
110+
string(2) " 1"
111+
string(3) " 1 "
112+
string(3) " 1K"
113+
string(4) " 1K "
114+
string(3) " 1k"
115+
string(4) " 1k "
116+
string(3) " 1M"
117+
string(4) " 1M "
118+
string(3) " 1m"
119+
string(4) " 1m "
120+
string(3) " 1G"
121+
string(4) " 1G "
122+
string(3) " 1g"
123+
string(4) " 1g "
124+
string(3) " 1 "
125+
string(4) " 1 "
126+
string(4) " 1 K"
127+
string(5) " 1 K "
128+
string(4) " 1 k"
129+
string(5) " 1 k "
130+
string(4) " 1 M"
131+
string(5) " 1 M "
132+
string(4) " 1 m"
133+
string(5) " 1 m "
134+
string(4) " 1 G"
135+
string(5) " 1 G "
136+
string(4) " 1 g"
137+
string(5) " 1 g "
138+
string(3) " +1"
139+
string(4) " +1 "
140+
string(4) " +1K"
141+
string(5) " +1K "
142+
string(4) " +1k"
143+
string(5) " +1k "
144+
string(4) " +1M"
145+
string(5) " +1M "
146+
string(4) " +1m"
147+
string(5) " +1m "
148+
string(4) " +1G"
149+
string(5) " +1G "
150+
string(4) " +1g"
151+
string(5) " +1g "
152+
string(4) " +1 "
153+
string(5) " +1 "
154+
string(5) " +1 K"
155+
string(6) " +1 K "
156+
string(5) " +1 k"
157+
string(6) " +1 k "
158+
string(5) " +1 M"
159+
string(6) " +1 M "
160+
string(5) " +1 m"
161+
string(6) " +1 m "
162+
string(5) " +1 G"
163+
string(6) " +1 G "
164+
string(5) " +1 g"
165+
string(6) " +1 g "
166+
string(3) " -1"
167+
string(4) " -1 "
168+
string(4) " -1K"
169+
string(5) " -1K "
170+
string(4) " -1k"
171+
string(5) " -1k "
172+
string(4) " -1M"
173+
string(5) " -1M "
174+
string(4) " -1m"
175+
string(5) " -1m "
176+
string(4) " -1G"
177+
string(5) " -1G "
178+
string(4) " -1g"
179+
string(5) " -1g "
180+
string(4) " -1 "
181+
string(5) " -1 "
182+
string(5) " -1 K"
183+
string(6) " -1 K "
184+
string(5) " -1 k"
185+
string(6) " -1 k "
186+
string(5) " -1 M"
187+
string(6) " -1 M "
188+
string(5) " -1 m"
189+
string(6) " -1 m "
190+
string(5) " -1 G"
191+
string(6) " -1 G "
192+
string(5) " -1 g"
193+
string(6) " -1 g "

Zend/tests/zend_atol_error.phpt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
--TEST--
2+
Test parsing failures of INI "size" values (e.g. "16M")
3+
--FILE--
4+
<?php
5+
6+
// This test checks invalid formats do throw warnings.
7+
8+
$tests = [
9+
'K', # No digits
10+
'1KM', # Multiple multipliers.
11+
'1X', # Unknown multiplier.
12+
'1.0K', # Non integral digits.
13+
];
14+
15+
foreach ($tests as $setting) {
16+
ini_set('default_socket_timeout', $setting);
17+
}
18+
19+
--EXPECTF--
20+
21+
Warning: Invalid numeric string 'K', no valid leading digits, interpreting as '0' in %s/zend_atol_error.php on line %d
22+
23+
Warning: Invalid numeric string '1KM', interpreting as '1M' in %s/zend_atol_error.php on line %d
24+
25+
Warning: Invalid numeric string '1X', interpreting as '1' in %s/zend_atol_error.php on line %d
26+
27+
Warning: Invalid numeric string '1.0K', interpreting as '1K' in %s/zend_atol_error.php on line %d

Zend/zend_operators.c

Lines changed: 50 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ static _locale_t current_locale = NULL;
3939
#define zend_tolower(c) tolower(c)
4040
#endif
4141

42+
#define ZEND_IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r') || ((c) == '\v') || ((c) == '\f'))
43+
4244
#define TYPE_PAIR(t1,t2) (((t1) << 4) | (t2))
4345

4446
static const unsigned char tolower_map[256] = {
@@ -80,56 +82,64 @@ static const unsigned char tolower_map[256] = {
8082

8183
ZEND_API int ZEND_FASTCALL zend_atoi(const char *str, size_t str_len) /* {{{ */
8284
{
83-
int retval;
84-
85-
if (!str_len) {
86-
str_len = strlen(str);
87-
}
88-
retval = ZEND_STRTOL(str, NULL, 0);
89-
if (str_len>0) {
90-
switch (str[str_len-1]) {
91-
case 'g':
92-
case 'G':
93-
retval *= 1024;
94-
/* break intentionally missing */
95-
case 'm':
96-
case 'M':
97-
retval *= 1024;
98-
/* break intentionally missing */
99-
case 'k':
100-
case 'K':
101-
retval *= 1024;
102-
break;
103-
}
104-
}
105-
return retval;
85+
return (int)zend_atol(str, str_len);
10686
}
10787
/* }}} */
10888

10989
ZEND_API zend_long ZEND_FASTCALL zend_atol(const char *str, size_t str_len) /* {{{ */
11090
{
11191
zend_long retval;
92+
char *end = NULL;
11293

11394
if (!str_len) {
11495
str_len = strlen(str);
11596
}
116-
retval = ZEND_STRTOL(str, NULL, 0);
117-
if (str_len>0) {
118-
switch (str[str_len-1]) {
119-
case 'g':
120-
case 'G':
121-
retval *= 1024;
122-
/* break intentionally missing */
123-
case 'm':
124-
case 'M':
125-
retval *= 1024;
126-
/* break intentionally missing */
127-
case 'k':
128-
case 'K':
129-
retval *= 1024;
130-
break;
131-
}
97+
98+
/* Ignore trailing whitespace */
99+
while (str_len && ZEND_IS_WHITESPACE(str[str_len-1])) --str_len;
100+
if (!str_len) return 0;
101+
102+
/* Parse integral portion */
103+
retval = ZEND_STRTOL(str, &end, 0);
104+
105+
if (!(end - str)) {
106+
zend_error(E_WARNING, "Invalid numeric string '%.*s', no valid leading digits, interpreting as '0'",
107+
(int)str_len, str);
108+
return 0;
132109
}
110+
111+
/* Allow for whitespace between integer portion and any suffix character */
112+
while (ZEND_IS_WHITESPACE(*end)) ++end;
113+
114+
/* No exponent suffix. */
115+
if (!*end) return retval;
116+
117+
switch (str[str_len-1]) {
118+
case 'g':
119+
case 'G':
120+
retval *= 1024;
121+
/* break intentionally missing */
122+
case 'm':
123+
case 'M':
124+
retval *= 1024;
125+
/* break intentionally missing */
126+
case 'k':
127+
case 'K':
128+
retval *= 1024;
129+
break;
130+
default:
131+
/* Unknown suffix */
132+
zend_error(E_WARNING, "Invalid numeric string '%.*s', interpreting as '%.*s'",
133+
(int)str_len, str, (int)(end - str), str);
134+
return retval;
135+
}
136+
137+
if (end < (str + str_len - 1)) {
138+
/* More than one character in suffix */
139+
zend_error(E_WARNING, "Invalid numeric string '%.*s', interpreting as '%.*s%c'",
140+
(int)str_len, str, (int)(end - str), str, str[str_len-1]);
141+
}
142+
133143
return retval;
134144
}
135145
/* }}} */
@@ -3007,7 +3017,7 @@ ZEND_API zend_uchar ZEND_FASTCALL _is_numeric_string_ex(const char *str, size_t
30073017

30083018
/* Skip any whitespace
30093019
* This is much faster than the isspace() function */
3010-
while (*str == ' ' || *str == '\t' || *str == '\n' || *str == '\r' || *str == '\v' || *str == '\f') {
3020+
while (ZEND_IS_WHITESPACE(*str)) {
30113021
str++;
30123022
length--;
30133023
}

0 commit comments

Comments
 (0)