Skip to content

Commit 51a2ce5

Browse files
committed
zend_atoi/zend_atol cleanup
zend_ato[il]() don't *just* do number parsing. They also check for a 'K', 'M', or 'G' at the end of the string, and multiply the parsed value out accordingly. Unfortunately, they ignore any other non-numerics between the numeric component and the last character in the string. This means that numbers such as the following are both valid and non-intuitive in their final output. * "123KMG" is interpreted as "123G" -> 132070244352 * "123G " is interpreted as "123 " -> 123 * "123GB" is interpreted as "123B" -> 123 * "123 I like tacos." is also interpreted as "123." -> 123 This diff primarily adds warnings for these cases when the output would be a potentially unexpected, and unhelpful value. Additionally, several places in php-src use these functions despite not actually wanting their KMG behavior such as session.upload_progress.freq which will happily parse "1 banana" as a valid value. For these settings, I've switched them to ZEND_STRTOL which preserves their existing /intended/ behavior. * It won't respect KMG suffixes, but they never really wanted that logic anyway. * It will ignore non-numeric suffixes so as to not introduce new failures. We should probably reexamine that second bullet point separately. Lastly, with these changes, zend_atoi() is no longer used in php-src, but I left it as a valid API for 3rd party extensions. Note that I did make it a proxy to zend_atol() since deferring the truncation till the end is effectively the same as truncation during multiplication, but this avoid code duplication. I think we should consider removing zend_atoi() entirely (perhaps in 8.0?) and rename zend_atol() to something reflecting it's KMG specific behavior.
1 parent fc42ac2 commit 51a2ce5

File tree

6 files changed

+64
-48
lines changed

6 files changed

+64
-48
lines changed

Zend/zend.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ static ZEND_INI_MH(OnUpdateAssertions) /* {{{ */
149149

150150
p = (zend_long *) (base+(size_t) mh_arg1);
151151

152-
val = zend_atol(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
152+
val = ZEND_STRTOL(ZSTR_VAL(new_value), NULL, 0);
153153

154154
if (stage != ZEND_INI_STAGE_STARTUP &&
155155
stage != ZEND_INI_STAGE_SHUTDOWN &&
@@ -828,7 +828,7 @@ int zend_startup(zend_utility_functions *utility_functions) /* {{{ */
828828
{
829829
char *tmp = getenv("USE_ZEND_DTRACE");
830830

831-
if (tmp && zend_atoi(tmp, 0)) {
831+
if (tmp && ZEND_STRTOL(tmp, NULL, 0)) {
832832
zend_dtrace_enabled = 1;
833833
zend_compile_file = dtrace_compile_file;
834834
zend_execute_ex = dtrace_execute_ex;

Zend/zend_alloc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2705,7 +2705,7 @@ static void alloc_globals_ctor(zend_alloc_globals *alloc_globals)
27052705

27062706
#if ZEND_MM_CUSTOM
27072707
tmp = getenv("USE_ZEND_ALLOC");
2708-
if (tmp && !zend_atoi(tmp, 0)) {
2708+
if (tmp && !ZEND_STRTOL(tmp, NULL, 0)) {
27092709
alloc_globals->mm_heap = malloc(sizeof(zend_mm_heap));
27102710
memset(alloc_globals->mm_heap, 0, sizeof(zend_mm_heap));
27112711
alloc_globals->mm_heap->use_custom_heap = ZEND_MM_CUSTOM_HEAP_STD;
@@ -2717,7 +2717,7 @@ static void alloc_globals_ctor(zend_alloc_globals *alloc_globals)
27172717
#endif
27182718

27192719
tmp = getenv("USE_ZEND_ALLOC_HUGE_PAGES");
2720-
if (tmp && zend_atoi(tmp, 0)) {
2720+
if (tmp && ZEND_STRTOL(tmp, NULL, 0)) {
27212721
zend_mm_use_huge_pages = 1;
27222722
}
27232723
alloc_globals->mm_heap = zend_mm_init();

Zend/zend_operators.c

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ static _locale_t current_locale = NULL;
3939
#define zend_tolower(c) tolower(c)
4040
#endif
4141

42+
#define ZEND_IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r') || ((c) == '\v') || ((c) == '\f'))
43+
4244
#define TYPE_PAIR(t1,t2) (((t1) << 4) | (t2))
4345

4446
static const unsigned char tolower_map[256] = {
@@ -78,58 +80,72 @@ static const unsigned char tolower_map[256] = {
7880
zend_binary_strncasecmp
7981
*/
8082

81-
ZEND_API int ZEND_FASTCALL zend_atoi(const char *str, size_t str_len) /* {{{ */
83+
static void zend_atol_warning(const char *str, size_t str_len, size_t num_len, char suffix) /* {{{ */
8284
{
83-
int retval;
85+
/* Duplicate just in case str[str_len] != 0 */
86+
zend_string *strcopy = zend_string_init(str, str_len, 0);
87+
zend_string *numonly = zend_string_init(str, num_len, 0);
88+
const char suffixstr[2] = { suffix, '\0' };
8489

85-
if (!str_len) {
86-
str_len = strlen(str);
87-
}
88-
retval = ZEND_STRTOL(str, NULL, 0);
89-
if (str_len>0) {
90-
switch (str[str_len-1]) {
91-
case 'g':
92-
case 'G':
93-
retval *= 1024;
94-
/* break intentionally missing */
95-
case 'm':
96-
case 'M':
97-
retval *= 1024;
98-
/* break intentionally missing */
99-
case 'k':
100-
case 'K':
101-
retval *= 1024;
102-
break;
103-
}
104-
}
105-
return retval;
90+
zend_error(E_WARNING, "Invalid numeric string '%s', interpreting as '%s%s'", ZSTR_VAL(strcopy), ZSTR_VAL(numonly), suffixstr);
91+
zend_string_release(numonly);
92+
zend_string_release(strcopy);
93+
}
94+
/* }}} */
95+
96+
97+
ZEND_API int ZEND_FASTCALL zend_atoi(const char *str, size_t str_len) /* {{{ */
98+
{
99+
return (int)zend_atol(str, str_len);
106100
}
107101
/* }}} */
108102

109103
ZEND_API zend_long ZEND_FASTCALL zend_atol(const char *str, size_t str_len) /* {{{ */
110104
{
111105
zend_long retval;
106+
char *end = NULL;
112107

113108
if (!str_len) {
114109
str_len = strlen(str);
115110
}
116-
retval = ZEND_STRTOL(str, NULL, 0);
117-
if (str_len>0) {
118-
switch (str[str_len-1]) {
119-
case 'g':
120-
case 'G':
121-
retval *= 1024;
122-
/* break intentionally missing */
123-
case 'm':
124-
case 'M':
125-
retval *= 1024;
126-
/* break intentionally missing */
127-
case 'k':
128-
case 'K':
129-
retval *= 1024;
130-
break;
131-
}
111+
112+
/* Ignore trailing whitespace */
113+
while (str_len && ZEND_IS_WHITESPACE(str[str_len-1])) --str_len;
114+
if (!str_len) return 0;
115+
116+
/* Parse integral portion */
117+
retval = ZEND_STRTOL(str, &end, 0);
118+
119+
/* Allow for whitespace between integer portion and any suffix character */
120+
while (ZEND_IS_WHITESPACE(*end)) ++end;
121+
122+
/* No exponent suffix. */
123+
if (!*end) return retval;
124+
125+
switch (str[str_len-1]) {
126+
case 'g':
127+
case 'G':
128+
retval *= 1024;
129+
/* break intentionally missing */
130+
case 'm':
131+
case 'M':
132+
retval *= 1024;
133+
/* break intentionally missing */
134+
case 'k':
135+
case 'K':
136+
retval *= 1024;
137+
break;
138+
default:
139+
/* Unknown suffix */
140+
zend_atol_warning(str, str_len, end - str, 0);
141+
return retval;
132142
}
143+
144+
if (end < (str + str_len - 1)) {
145+
/* More than one character in suffix */
146+
zend_atol_warning(str, str_len, end - str, str[str_len-1]);
147+
}
148+
133149
return retval;
134150
}
135151
/* }}} */
@@ -3007,7 +3023,7 @@ ZEND_API zend_uchar ZEND_FASTCALL _is_numeric_string_ex(const char *str, size_t
30073023

30083024
/* Skip any whitespace
30093025
* This is much faster than the isspace() function */
3010-
while (*str == ' ' || *str == '\t' || *str == '\n' || *str == '\r' || *str == '\v' || *str == '\f') {
3026+
while (ZEND_IS_WHITESPACE(*str)) {
30113027
str++;
30123028
length--;
30133029
}

ext/session/session.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -768,8 +768,8 @@ static PHP_INI_MH(OnUpdateLazyWrite) /* {{{ */
768768

769769
static PHP_INI_MH(OnUpdateRfc1867Freq) /* {{{ */
770770
{
771-
int tmp;
772-
tmp = zend_atoi(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
771+
zend_long tmp = ZEND_STRTOL(ZSTR_VAL(new_value), NULL, 0);
772+
773773
if(tmp < 0) {
774774
php_error_docref(NULL, E_WARNING, "session.upload_progress.freq must be greater than or equal to zero");
775775
return FAILURE;

ext/standard/basic_functions.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5994,7 +5994,7 @@ static void php_simple_ini_parser_cb(zval *arg1, zval *arg2, zval *arg3, int cal
59945994
}
59955995

59965996
if (!(Z_STRLEN_P(arg1) > 1 && Z_STRVAL_P(arg1)[0] == '0') && is_numeric_string(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1), NULL, NULL, 0) == IS_LONG) {
5997-
zend_ulong key = (zend_ulong) zend_atol(Z_STRVAL_P(arg1), Z_STRLEN_P(arg1));
5997+
zend_ulong key = ZEND_STRTOUL(Z_STRVAL_P(arg1), NULL, 0);
59985998
if ((find_hash = zend_hash_index_find(Z_ARRVAL_P(arr), key)) == NULL) {
59995999
array_init(&hash);
60006000
find_hash = zend_hash_index_add_new(Z_ARRVAL_P(arr), key, &hash);

ext/zlib/zlib.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1436,7 +1436,7 @@ static PHP_INI_MH(OnUpdate_zlib_output_compression)
14361436
} else if (!strncasecmp(ZSTR_VAL(new_value), "on", sizeof("on"))) {
14371437
int_value = 1;
14381438
} else {
1439-
int_value = zend_atoi(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
1439+
int_value = ZEND_STRTOL(ZSTR_VAL(new_value), NULL, 0);
14401440
}
14411441
ini_value = zend_ini_string("output_handler", sizeof("output_handler"), 0);
14421442

0 commit comments

Comments
 (0)