Skip to content

Commit 9768c2f

Browse files
committed
PHP 7.3: Make stream wrapper and windows drive checks locale-independent
- Avoid registering/detecting stream wrappers in locale-independent ways. - Avoid locale dependence for Windows drive letter names in zend_virtual_cwd - Make parse_url stop depending on locale Related to https://bugs.php.net/bug.php?id=52923 iscntrl is locale-dependent which seems to corrupt certain bytes. - Make FILTER_VALIDATE_HOSTNAME with flag FILTER_VALIDATE_DOMAIN locale-independent Somewhat related to https://wiki.php.net/rfc/strtolower-ascii but I don't think most of these should have been locale-dependent in the first place - the code may not have considered locales E.g. on Linux, `setlocale(LC_ALL, 'de_DE');` (if the locale is installed and it succeeds) will have some values for alpha/cntrl in the range 128-256 where the C locale has no values. To avoid this locale-dependence in older php versions, applications can set `setlocale(LC_CTYPE, 'C')`. Use `zend_bool` since the switch to `bool` was not made in 7.3
1 parent 7d92153 commit 9768c2f

File tree

11 files changed

+128
-12
lines changed

11 files changed

+128
-12
lines changed

Zend/zend_compile.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1849,7 +1849,7 @@ ZEND_API size_t zend_dirname(char *path, size_t len)
18491849
/* Note that on Win32 CWD is per drive (heritage from CP/M).
18501850
* This means dirname("c:foo") maps to "c:." or "c:" - which means CWD on C: drive.
18511851
*/
1852-
if ((2 <= len) && isalpha((int)((unsigned char *)path)[0]) && (':' == path[1])) {
1852+
if ((2 <= len) && zend_isalpha_ascii((int)((unsigned char *)path)[0]) && (':' == path[1])) {
18531853
/* Skip over the drive spec (if any) so as not to change */
18541854
path += 2;
18551855
len_adjust += 2;

Zend/zend_operators.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,47 @@ static const unsigned char tolower_map[256] = {
6262

6363
#define zend_tolower_ascii(c) (tolower_map[(unsigned char)(c)])
6464

65+
/* ctype's isalpha varies based on locale, which is not what we want for many use cases.
66+
* This is what it'd be in the "C" locale. */
67+
ZEND_API const zend_bool zend_isalpha_map[256] = {
68+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
69+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
70+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
71+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
72+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
73+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
74+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
75+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
76+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
77+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
78+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
79+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
80+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
81+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
82+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
83+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
84+
};
85+
86+
/* ctype's isalnum is isalpha + isdigit(0-9) */
87+
ZEND_API const zend_bool zend_isalnum_map[256] = {
88+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
89+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
90+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
91+
1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
92+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
93+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
94+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
95+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
96+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
97+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
98+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
99+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
100+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
101+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
102+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
103+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
104+
};
105+
65106
/**
66107
* Functions using locale lowercase:
67108
zend_binary_strncasecmp_l

Zend/zend_operators.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,13 @@ ZEND_API int ZEND_FASTCALL string_case_compare_function(zval *op1, zval *op2);
365365
ZEND_API int ZEND_FASTCALL string_locale_compare_function(zval *op1, zval *op2);
366366
#endif
367367

368+
/* NOTE: The locale-independent alternatives to ctype(isalpha/isalnum) were added to fix bugs in php 7.3 patch releases, and should not be used externally until php 8.2 */
369+
ZEND_API extern const zend_bool zend_isalpha_map[256];
370+
ZEND_API extern const zend_bool zend_isalnum_map[256];
371+
372+
#define zend_isalpha_ascii(c) (zend_isalpha_map[(unsigned char)(c)])
373+
#define zend_isalnum_ascii(c) (zend_isalnum_map[(unsigned char)(c)])
374+
368375
ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length);
369376
ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length);
370377
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t length);

Zend/zend_virtual_cwd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ typedef unsigned short mode_t;
6969
#define IS_UNC_PATH(path, len) \
7070
(len >= 2 && IS_SLASH(path[0]) && IS_SLASH(path[1]))
7171
#define IS_ABSOLUTE_PATH(path, len) \
72-
(len >= 2 && (/* is local */isalpha(path[0]) && path[1] == ':' || /* is UNC */IS_SLASH(path[0]) && IS_SLASH(path[1])))
72+
(len >= 2 && (/* is local */zend_isalpha_ascii(path[0]) && path[1] == ':' || /* is UNC */IS_SLASH(path[0]) && IS_SLASH(path[1])))
7373

7474
#else
7575
#ifdef HAVE_DIRENT_H

ext/filter/logical_filters.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -496,21 +496,21 @@ static int _php_filter_validate_domain(char * domain, int len, zend_long flags)
496496
}
497497

498498
/* First char must be alphanumeric */
499-
if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
499+
if(*s == '.' || (hostname && !zend_isalnum_ascii((int)*(unsigned char *)s))) {
500500
return 0;
501501
}
502502

503503
while (s < e) {
504504
if (*s == '.') {
505505
/* The first and the last character of a label must be alphanumeric */
506-
if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
506+
if (*(s + 1) == '.' || (hostname && (!zend_isalnum_ascii((int)*(unsigned char *)(s - 1)) || !zend_isalnum_ascii((int)*(unsigned char *)(s + 1))))) {
507507
return 0;
508508
}
509509

510510
/* Reset label length counter */
511511
i = 1;
512512
} else {
513-
if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
513+
if (i > 63 || (hostname && *s != '-' && !zend_isalnum_ascii((int)*(unsigned char *)s))) {
514514
return 0;
515515
}
516516

@@ -537,7 +537,7 @@ static int is_userinfo_valid(zend_string *str)
537537
const char *valid = "-._~!$&'()*+,;=:";
538538
const char *p = ZSTR_VAL(str);
539539
while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
540-
if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
540+
if (zend_isalnum_ascii(*p) || strchr(valid, *p)) {
541541
p++;
542542
} else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
543543
p += 3;
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
--TEST--
2+
FILTER_VALIDATE_DOMAIN FILTER_FLAG_HOSTNAME should not be locale dependent
3+
--EXTENSIONS--
4+
filter
5+
--SKIPIF--
6+
<?php // try to activate a single-byte german locale
7+
if (!setlocale(LC_ALL, "de_DE")) {
8+
print "skip Can't find german locale";
9+
}
10+
?>
11+
--FILE--
12+
<?php
13+
var_dump(filter_var('٪', FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME));
14+
setlocale(LC_ALL, "de_DE");
15+
var_dump(filter_var('٪', FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME));
16+
?>
17+
--EXPECT--
18+
bool(false)
19+
bool(false)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
--TEST--
2+
Stream wrappers should not be locale dependent
3+
--SKIPIF--
4+
<?php // try to activate a single-byte german locale
5+
if (!setlocale(LC_ALL, "de_DE")) {
6+
print "skip Can't find german locale";
7+
}
8+
?>
9+
--INI--
10+
allow_url_fopen=1
11+
display_errors=stderr
12+
--FILE--
13+
<?php
14+
setlocale(LC_ALL, "de_DE");
15+
class testwrapper {
16+
}
17+
18+
var_dump(ctype_alpha('٪')); // \xd9 and \xaa are both alphabetical in the german locale
19+
var_dump(stream_wrapper_register("test٪", 'testwrapper', STREAM_IS_URL));
20+
21+
echo 'stream_open: ';
22+
fopen("test٪://test", 'r');
23+
?>
24+
--EXPECTF--
25+
bool(true)
26+
Warning: stream_wrapper_register(): Invalid protocol scheme specified. Unable to register wrapper class testwrapper to test٪:// in %s on line 7
27+
bool(false)
28+
stream_open: Warning: fopen(test٪://test): Failed to open stream: No such file or directory in %s on line 10
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
--TEST--
2+
Bug #52923 (Locale settings affecting parse_url)
3+
--SKIPIF--
4+
<?php // try to activate a german locale
5+
if (!setlocale(LC_ALL, "de_DE")) {
6+
print "skip Can't find german locale";
7+
}
8+
?>
9+
--FILE--
10+
<?php
11+
echo http_build_query(parse_url("http\xfc://invalid"), true), "\n";
12+
// activate the german locale. With this bug fix, locale settings should no longer affect parse_url
13+
var_dump(setlocale(LC_CTYPE, "de_DE"));
14+
echo http_build_query(parse_url("http\xfc://invalid"), true), "\n";
15+
echo http_build_query(parse_url('http://mydomain.com/path/道')), "\n";
16+
?>
17+
--EXPECT--
18+
path=http%FC%3A%2F%2Finvalid
19+
string(5) "de_DE"
20+
path=http%FC%3A%2F%2Finvalid
21+
scheme=http&host=mydomain.com&path=%2Fpath%2F%E9%81%93

main/fopen_wrappers.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ PHPAPI zend_string *php_resolve_path(const char *filename, size_t filename_lengt
484484
}
485485

486486
/* Don't resolve paths which contain protocol (except of file://) */
487-
for (p = filename; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
487+
for (p = filename; zend_isalnum_ascii((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
488488
if ((*p == ':') && (p - filename > 1) && (p[1] == '/') && (p[2] == '/')) {
489489
wrapper = php_stream_locate_url_wrapper(filename, &actual_path, STREAM_OPEN_FOR_INCLUDE);
490490
if (wrapper == &php_plain_files_wrapper) {
@@ -520,7 +520,7 @@ PHPAPI zend_string *php_resolve_path(const char *filename, size_t filename_lengt
520520
/* Check for stream wrapper */
521521
int is_stream_wrapper = 0;
522522

523-
for (p = ptr; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
523+
for (p = ptr; zend_isalnum_ascii((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
524524
if ((*p == ':') && (p - ptr > 1) && (p[1] == '/') && (p[2] == '/')) {
525525
/* .:// or ..:// is not a stream wrapper */
526526
if (p[-1] != '.' || p[-2] != '.' || p - 2 != ptr) {
@@ -586,7 +586,7 @@ PHPAPI zend_string *php_resolve_path(const char *filename, size_t filename_lengt
586586
actual_path = trypath;
587587

588588
/* Check for stream wrapper */
589-
for (p = trypath; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
589+
for (p = trypath; zend_isalnum_ascii((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
590590
if ((*p == ':') && (p - trypath > 1) && (p[1] == '/') && (p[2] == '/')) {
591591
wrapper = php_stream_locate_url_wrapper(trypath, &actual_path, STREAM_OPEN_FOR_INCLUDE);
592592
if (!wrapper) {

main/streams/streams.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,7 +1673,7 @@ static inline int php_stream_wrapper_scheme_validate(const char *protocol, unsig
16731673
unsigned int i;
16741674

16751675
for(i = 0; i < protocol_len; i++) {
1676-
if (!isalnum((int)protocol[i]) &&
1676+
if (!zend_isalnum_ascii((int)protocol[i]) &&
16771677
protocol[i] != '+' &&
16781678
protocol[i] != '-' &&
16791679
protocol[i] != '.') {
@@ -1753,7 +1753,7 @@ PHPAPI php_stream_wrapper *php_stream_locate_url_wrapper(const char *path, const
17531753
return (php_stream_wrapper*)((options & STREAM_LOCATE_WRAPPERS_ONLY) ? NULL : &php_plain_files_wrapper);
17541754
}
17551755

1756-
for (p = path; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++) {
1756+
for (p = path; zend_isalnum_ascii((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++) {
17571757
n++;
17581758
}
17591759

main/streams/transports.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ PHPAPI php_stream *_php_stream_xport_create(const char *name, size_t namelen, in
9595
}
9696
}
9797

98-
for (p = name; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++) {
98+
for (p = name; zend_isalnum_ascii((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++) {
9999
n++;
100100
}
101101

0 commit comments

Comments
 (0)