Skip to content

Use locale-independent alternatives to isalpha/isalnum/isctrl #7802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Zend/zend_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1890,7 +1890,7 @@ ZEND_API size_t zend_dirname(char *path, size_t len)
/* Note that on Win32 CWD is per drive (heritage from CP/M).
* This means dirname("c:foo") maps to "c:." or "c:" - which means CWD on C: drive.
*/
if ((2 <= len) && isalpha((int)((unsigned char *)path)[0]) && (':' == path[1])) {
if ((2 <= len) && zend_isalpha_ascii((int)((unsigned char *)path)[0]) && (':' == path[1])) {
/* Skip over the drive spec (if any) so as not to change */
path += 2;
len_adjust += 2;
Expand Down
63 changes: 62 additions & 1 deletion Zend/zend_operators.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,70 @@ ZEND_API const unsigned char zend_toupper_map[256] = {
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
};

/* ctype's isalpha varies based on locale, which is not what we want for many use cases.
* This is what it'd be in the "C" locale. */
ZEND_API const bool zend_isalpha_map[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};

/* ctype's isalnum is isalpha + isdigit(0-9) */
ZEND_API const bool zend_isalnum_map[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};

/* ctype's iscntrl varies based on locale, which is not what we want for many use cases.
* This is what it'd be in the "C" locale. */
ZEND_API const bool zend_iscntrl_map[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};

/**
* Functions using locale lowercase:
Expand Down
6 changes: 6 additions & 0 deletions Zend/zend_operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,9 +435,15 @@ ZEND_API int ZEND_FASTCALL string_locale_compare_function(zval *op1, zval *op2);

ZEND_API extern const unsigned char zend_tolower_map[256];
ZEND_API extern const unsigned char zend_toupper_map[256];
ZEND_API extern const bool zend_isalpha_map[256];
ZEND_API extern const bool zend_isalnum_map[256];
ZEND_API extern const bool zend_iscntrl_map[256];

#define zend_tolower_ascii(c) (zend_tolower_map[(unsigned char)(c)])
#define zend_toupper_ascii(c) (zend_toupper_map[(unsigned char)(c)])
#define zend_isalpha_ascii(c) (zend_isalpha_map[(unsigned char)(c)])
#define zend_isalnum_ascii(c) (zend_isalnum_map[(unsigned char)(c)])
#define zend_iscntrl_ascii(c) (zend_iscntrl_map[(unsigned char)(c)])

ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length);
ZEND_API void ZEND_FASTCALL zend_str_toupper(char *str, size_t length);
Expand Down
2 changes: 1 addition & 1 deletion Zend/zend_virtual_cwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ typedef unsigned short mode_t;
#define IS_UNC_PATH(path, len) \
(len >= 2 && IS_SLASH(path[0]) && IS_SLASH(path[1]))
#define IS_ABSOLUTE_PATH(path, len) \
(len >= 2 && (/* is local */isalpha(path[0]) && path[1] == ':' || /* is UNC */IS_SLASH(path[0]) && IS_SLASH(path[1])))
(len >= 2 && (/* is local */zend_isalpha_ascii(path[0]) && path[1] == ':' || /* is UNC */IS_SLASH(path[0]) && IS_SLASH(path[1])))

#else
#ifdef HAVE_DIRENT_H
Expand Down
10 changes: 5 additions & 5 deletions ext/fileinfo/libmagic/apprentice.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ get_standard_integer_type(const char *l, const char **t)
{
int type;

if (isalpha(CAST(unsigned char, l[1]))) {
if (zend_isalpha_ascii(CAST(unsigned char, l[1]))) {
switch (l[1]) {
case 'C':
/* "dC" and "uC" */
Expand Down Expand Up @@ -1187,7 +1187,7 @@ load_1(struct magic_set *ms, int action, const char *fn, int *errs,
continue;
}
if ((*bang[i].fun)(ms, &me,
line + bang[i].len + 2,
line + bang[i].len + 2,
len - bang[i].len - 2) != 0) {
(*errs)++;
continue;
Expand Down Expand Up @@ -1419,7 +1419,7 @@ apprentice_load(struct magic_set *ms, const char *fn, int action)
/* coalesce per file arrays into a single one, if needed */
if (mset[j].count == 0)
continue;

if (coalesce_entries(ms, mset[j].me, mset[j].count,
&map->magic[j], &map->nmagic[j]) == -1) {
errs++;
Expand Down Expand Up @@ -2071,7 +2071,7 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
if (*l == 'd')
m->type = get_standard_integer_type(l, &l);
else if (*l == 's'
&& !isalpha(CAST(unsigned char, l[1]))) {
&& !zend_isalpha_ascii(CAST(unsigned char, l[1]))) {
m->type = FILE_STRING;
++l;
}
Expand Down Expand Up @@ -2287,7 +2287,7 @@ parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line,
private int
goodchar(unsigned char x, const char *extra)
{
return (isascii(x) && isalnum(x)) || strchr(extra, x);
return (zend_isalnum_ascii(x)) || strchr(extra, x);
}

private int
Expand Down
2 changes: 1 addition & 1 deletion ext/fileinfo/libmagic/compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);

for (p = buf; *p; p++)
if (!isalnum(*p))
if (!zend_isalnum_ascii(*p))
*p = '-';

return file_printf(ms, "application/x-decompression-error-%s-%s",
Expand Down
2 changes: 1 addition & 1 deletion ext/fileinfo/libmagic/encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ looks_ucs32(const unsigned char *bf, size_t nbytes, file_unichar_t *ubf,
| (CAST(file_unichar_t, bf[i]) << 24);
else
ubf[(*ulen)++] = CAST(file_unichar_t, bf[i + 0])
| (CAST(file_unichar_t, bf[i + 1]) << 8)
| (CAST(file_unichar_t, bf[i + 1]) << 8)
| (CAST(file_unichar_t, bf[i + 2]) << 16)
| (CAST(file_unichar_t, bf[i + 3]) << 24);

Expand Down
2 changes: 1 addition & 1 deletion ext/fileinfo/libmagic/funcs.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ file_checkfmt(char *msg, size_t mlen, const char *fmt)
return -1;
}

if (!isalpha((unsigned char)*p)) {
if (!zend_isalpha_ascii((unsigned char)*p)) {
if (msg)
snprintf(msg, mlen, "bad format char: %c", *p);
return -1;
Expand Down
8 changes: 4 additions & 4 deletions ext/filter/logical_filters.c
Original file line number Diff line number Diff line change
Expand Up @@ -528,21 +528,21 @@ static int _php_filter_validate_domain(char * domain, int len, zend_long flags)
}

/* First char must be alphanumeric */
if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
if(*s == '.' || (hostname && !zend_isalnum_ascii((int)*(unsigned char *)s))) {
return 0;
}

while (s < e) {
if (*s == '.') {
/* The first and the last character of a label must be alphanumeric */
if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
if (*(s + 1) == '.' || (hostname && (!zend_isalnum_ascii((int)*(unsigned char *)(s - 1)) || !zend_isalnum_ascii((int)*(unsigned char *)(s + 1))))) {
return 0;
}

/* Reset label length counter */
i = 1;
} else {
if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
if (i > 63 || (hostname && *s != '-' && !zend_isalnum_ascii((int)*(unsigned char *)s))) {
return 0;
}

Expand All @@ -569,7 +569,7 @@ static int is_userinfo_valid(zend_string *str)
const char *valid = "-._~!$&'()*+,;=:";
const char *p = ZSTR_VAL(str);
while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
if (zend_isalnum_ascii(*p) || strchr(valid, *p)) {
p++;
} else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
p += 3;
Expand Down
19 changes: 19 additions & 0 deletions ext/filter/tests/filter_validate_domain_locale.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
--TEST--
FILTER_VALIDATE_DOMAIN FILTER_FLAG_HOSTNAME should not be locale dependent
--EXTENSIONS--
filter
--SKIPIF--
<?php // try to activate a single-byte german locale
if (!setlocale(LC_ALL, "de_DE")) {
print "skip Can't find german locale";
}
?>
--FILE--
<?php
var_dump(filter_var('٪', FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME));
setlocale(LC_ALL, "de_DE");
var_dump(filter_var('٪', FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME));
?>
--EXPECT--
bool(false)
bool(false)
4 changes: 2 additions & 2 deletions ext/gd/libgd/gd_xbm.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ void gdImageXbmCtx(gdImagePtr image, char* file_name, int fg, gdIOCtx * out)
name = estrdup("image");
} else {
for (i=0; i<l; i++) {
/* only in C-locale isalnum() would work */
if (!isupper(name[i]) && !islower(name[i]) && !isdigit(name[i])) {
/* Locale-independent check */
if (!zend_isalnum_ascii(name[i])) {
name[i] = '_';
}
}
Expand Down
2 changes: 1 addition & 1 deletion ext/imap/php_imap.c
Original file line number Diff line number Diff line change
Expand Up @@ -2306,7 +2306,7 @@ PHP_FUNCTION(imap_utf8)
#define SPECIAL(c) ((c) <= 0x1f || (c) >= 0x7f)

/* validate a modified-base64 character */
#define B64CHAR(c) (isalnum(c) || (c) == '+' || (c) == ',')
#define B64CHAR(c) (zend_isalnum_ascii(c) || (c) == '+' || (c) == ',')

/* map the low 64 bits of `n' to the modified-base64 characters */
#define B64(n) ("ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
Expand Down
2 changes: 1 addition & 1 deletion ext/mbstring/mbstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -3622,7 +3622,7 @@ PHP_FUNCTION(mb_send_mail)
to_r[to_len - 1] = '\0';
}
for (i = 0; to_r[i]; i++) {
if (iscntrl((unsigned char) to_r[i])) {
if (zend_iscntrl_ascii((unsigned char) to_r[i])) {
/* According to RFC 822, section 3.1.1 long headers may be separated into
* parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
* To prevent these separators from being replaced with a space, we skip over them. */
Expand Down
6 changes: 3 additions & 3 deletions ext/standard/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -2405,10 +2405,10 @@ php_meta_tags_token php_next_meta_token(php_meta_tags_data *md)
break;

default:
if (isalnum(ch)) {
if (zend_isalnum_ascii(ch)) {
md->token_len = 0;
buff[(md->token_len)++] = ch;
while (!php_stream_eof(md->stream) && (ch = php_stream_getc(md->stream)) && (isalnum(ch) || strchr(PHP_META_HTML401_CHARS, ch))) {
while (!php_stream_eof(md->stream) && (ch = php_stream_getc(md->stream)) && (zend_isalnum_ascii(ch) || strchr(PHP_META_HTML401_CHARS, ch))) {
buff[(md->token_len)++] = ch;

if (md->token_len == META_DEF_BUFSIZE) {
Expand All @@ -2417,7 +2417,7 @@ php_meta_tags_token php_next_meta_token(php_meta_tags_data *md)
}

/* This is ugly, but we have to replace ungetc */
if (!isalpha(ch) && ch != '-') {
if (!zend_isalnum_ascii(ch) && ch != '-') {
md->ulc = 1;
md->lc = ch;
}
Expand Down
2 changes: 1 addition & 1 deletion ext/standard/formatted_print.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ php_formatted_print(char *format, size_t format_len, zval *args, int argc, int n

PRINTF_DEBUG(("sprintf: first looking at '%c', inpos=%d\n",
*format, format - Z_STRVAL_P(z_format)));
if (isalpha((int)*format)) {
if (zend_isalpha_ascii((int)*format)) {
width = precision = 0;
argnum = ARG_NUM_NEXT;
} else {
Expand Down
2 changes: 1 addition & 1 deletion ext/standard/ftp_fopen_wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ static php_stream *php_ftp_fopen_connect(php_stream_wrapper *wrapper, const char
#define PHP_FTP_CNTRL_CHK(val, val_len, err_msg) { \
unsigned char *s = (unsigned char *) val, *e = (unsigned char *) s + val_len; \
while (s < e) { \
if (iscntrl(*s)) { \
if (zend_iscntrl_ascii(*s)) { \
php_stream_wrapper_log_error(wrapper, options, err_msg, val); \
goto connect_errexit; \
} \
Expand Down
2 changes: 1 addition & 1 deletion ext/standard/http_fopen_wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper,
ZSTR_LEN(val) = php_url_decode(ZSTR_VAL(val), ZSTR_LEN(val)); \
s = (unsigned char*)ZSTR_VAL(val); e = s + ZSTR_LEN(val); \
while (s < e) { \
if (iscntrl(*s)) { \
if (zend_iscntrl_ascii(*s)) { \
php_stream_wrapper_log_error(wrapper, options, "Invalid redirect URL! %s", new_path); \
goto out; \
} \
Expand Down
4 changes: 2 additions & 2 deletions ext/standard/mail.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ PHP_FUNCTION(mail)
to_r[to_len - 1] = '\0';
}
for (i = 0; to_r[i]; i++) {
if (iscntrl((unsigned char) to_r[i])) {
if (zend_iscntrl_ascii((unsigned char) to_r[i])) {
/* According to RFC 822, section 3.1.1 long headers may be separated into
* parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
* To prevent these separators from being replaced with a space, we use the
Expand All @@ -264,7 +264,7 @@ PHP_FUNCTION(mail)
subject_r[subject_len - 1] = '\0';
}
for (i = 0; subject_r[i]; i++) {
if (iscntrl((unsigned char) subject_r[i])) {
if (zend_iscntrl_ascii((unsigned char) subject_r[i])) {
SKIP_LONG_HEADER_SEP(subject_r, i);
subject_r[i] = ' ';
}
Expand Down
4 changes: 2 additions & 2 deletions ext/standard/metaphone.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ static const char _codes[26] =
/* a b c d e f g h i j k l m n o p q r s t u v w x y z */
};


#define ENCODE(c) (isalpha(c) ? _codes[((toupper(c)) - 'A')] : 0)
/* Here, this avoids locale dependency to ensure the index is a valid index of _codes. */
#define ENCODE(c) (zend_isalpha_ascii(c) ? _codes[((zend_toupper_ascii(c)) - 'A')] : 0)

#define isvowel(c) (ENCODE(c) & 1) /* AEIOU */

Expand Down
2 changes: 1 addition & 1 deletion ext/standard/quot_print.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ PHPAPI zend_string *php_quot_print_encode(const unsigned char *str, size_t lengt
length--;
lp = 0;
} else {
if (iscntrl (c) || (c == 0x7f) || (c & 0x80) || (c == '=') || ((c == ' ') && (*str == '\015'))) {
if (zend_iscntrl_ascii(c) || (c == 0x7f) || (c & 0x80) || (c == '=') || ((c == ' ') && (*str == '\015'))) {
if ((((lp+= 3) > PHP_QPRINT_MAXL) && (c <= 0x7f))
|| ((c > 0x7f) && (c <= 0xdf) && ((lp + 3) > PHP_QPRINT_MAXL))
|| ((c > 0xdf) && (c <= 0xef) && ((lp + 6) > PHP_QPRINT_MAXL))
Expand Down
28 changes: 28 additions & 0 deletions ext/standard/tests/streams/locale.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
--TEST--
Stream wrappers should not be locale dependent
--SKIPIF--
<?php // try to activate a single-byte german locale
if (!setlocale(LC_ALL, "de_DE")) {
print "skip Can't find german locale";
}
?>
--INI--
allow_url_fopen=1
display_errors=stderr
--FILE--
<?php
setlocale(LC_ALL, "de_DE");
class testwrapper {
}

var_dump(ctype_alpha('٪')); // \xd9 and \xaa are both alphabetical in the german locale
var_dump(stream_wrapper_register("test٪", 'testwrapper', STREAM_IS_URL));

echo 'stream_open: ';
fopen("test٪://test", 'r');
?>
--EXPECTF--
bool(true)
Warning: stream_wrapper_register(): Invalid protocol scheme specified. Unable to register wrapper class testwrapper to test٪:// in %s on line 7
bool(false)
stream_open: Warning: fopen(test٪://test): Failed to open stream: No such file or directory in %s on line 10
21 changes: 21 additions & 0 deletions ext/standard/tests/url/parse_url_locale.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
--TEST--
Bug #52923 (Locale settings affecting parse_url)
--SKIPIF--
<?php // try to activate a german locale
if (!setlocale(LC_ALL, "de_DE")) {
print "skip Can't find german locale";
}
?>
--FILE--
<?php
echo http_build_query(parse_url("http\xfc://invalid"), true), "\n";
// activate the german locale. With this bug fix, locale settings should no longer affect parse_url
var_dump(setlocale(LC_CTYPE, "de_DE"));
echo http_build_query(parse_url("http\xfc://invalid"), true), "\n";
echo http_build_query(parse_url('http://mydomain.com/path/道')), "\n";
?>
--EXPECT--
path=http%FC%3A%2F%2Finvalid
string(5) "de_DE"
path=http%FC%3A%2F%2Finvalid
scheme=http&host=mydomain.com&path=%2Fpath%2F%E9%81%93
Loading