Skip to content

Commit f901bec

Browse files
committed
Fix #51903: simplexml_load_file() doesn't use HTTP headers
The `encoding` attribute of the XML declaration is optional; it is good practice to use external encoding information where available if it is missing. Thus, we check for `charset` info of `Content-Type` headers, and see whether the encoding is supported. We cater to trailing parameters and quoted-strings, but not to escaped backslashes and quotes in quoted-strings, since no known character encoding contains these anyway. Co-authored-by: Michael Wallner <mike@php.net> Closes GH-6747.
1 parent 5787f91 commit f901bec

File tree

3 files changed

+89
-0
lines changed

3 files changed

+89
-0
lines changed

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ PHP NEWS
1111
. Fixed bug #80763 (msgfmt_format() does not accept DateTime references).
1212
(cmb)
1313

14+
- Libxml:
15+
. Fixed bug #51903 (simplexml_load_file() doesn't use HTTP headers). (cmb)
16+
1417
- MySQLnd:
1518
. Fixed bug #80713 (SegFault when disabling ATTR_EMULATE_PREPARES and
1619
MySQL 8.0). (Nikita)

ext/libxml/libxml.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,54 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
409409
return(NULL);
410410
}
411411

412+
/* Check if there's been an external transport protocol with an encoding information */
413+
if (enc == XML_CHAR_ENCODING_NONE) {
414+
php_stream *s = (php_stream *) context;
415+
416+
if (Z_TYPE(s->wrapperdata) == IS_ARRAY) {
417+
zval *header;
418+
419+
ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
420+
const char buf[] = "Content-Type:";
421+
if (Z_TYPE_P(header) == IS_STRING &&
422+
!zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
423+
char *needle = estrdup("charset=");
424+
char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
425+
char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1);
426+
427+
if (encoding) {
428+
char *end;
429+
430+
encoding += sizeof("charset=")-1;
431+
if (*encoding == '"') {
432+
encoding++;
433+
}
434+
end = strchr(encoding, ';');
435+
if (end == NULL) {
436+
end = encoding + strlen(encoding);
437+
}
438+
end--; /* end == encoding-1 isn't a buffer underrun */
439+
while (*end == ' ' || *end == '\t') {
440+
end--;
441+
}
442+
if (*end == '"') {
443+
end--;
444+
}
445+
if (encoding >= end) continue;
446+
*(end+1) = '\0';
447+
enc = xmlParseCharEncoding(encoding);
448+
if (enc <= XML_CHAR_ENCODING_NONE) {
449+
enc = XML_CHAR_ENCODING_NONE;
450+
}
451+
}
452+
efree(haystack);
453+
efree(needle);
454+
break; /* found content-type */
455+
}
456+
} ZEND_HASH_FOREACH_END();
457+
}
458+
}
459+
412460
/* Allocate the Input buffer front-end. */
413461
ret = xmlAllocParserInputBuffer(enc);
414462
if (ret != NULL) {

ext/libxml/tests/bug51903.phpt

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
--TEST--
2+
Bug #51903 (simplexml_load_file() doesn't use HTTP headers)
3+
--SKIPIF--
4+
<?php
5+
if (!extension_loaded('simplexml')) die('skip simplexml extension not available');
6+
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
7+
http_server_skipif('tcp://127.0.0.1:12342');
8+
?>
9+
--FILE--
10+
<?php
11+
require "./ext/standard/tests/http/server.inc";
12+
$responses = [
13+
"data://text/plain,HTTP/1.1 200 OK\r\n"
14+
. "Content-Type: text/xml; charset=ISO-8859-1\r\n\r\n"
15+
. "<?xml version=\"1.0\"?>\n"
16+
. "<root>\xE4\xF6\xFC</root>\n",
17+
"data://text/plain,HTTP/1.1 200 OK\r\n"
18+
. "Content-Type: text/xml; charset=ISO-8859-1; foo=bar\r\n\r\n"
19+
. "<?xml version=\"1.0\"?>\n"
20+
. "<root>\xE4\xF6\xFC</root>\n",
21+
"data://text/plain,HTTP/1.1 200 OK\r\n"
22+
. "Content-Type: text/xml; charset=\"ISO-8859-1\" ; foo=bar\r\n\r\n"
23+
. "<?xml version=\"1.0\"?>\n"
24+
. "<root>\xE4\xF6\xFC</root>\n",
25+
];
26+
$pid = http_server('tcp://127.0.0.1:12342', $responses);
27+
28+
for ($i = 0; $i < count($responses); $i++) {
29+
$sxe = simplexml_load_file('http://127.0.0.1:12342/');
30+
echo "$sxe\n";
31+
}
32+
33+
http_server_kill($pid);
34+
?>
35+
--EXPECT--
36+
äöü
37+
äöü
38+
äöü

0 commit comments

Comments
 (0)