Skip to content

Commit a9661a5

Browse files
Aliaksandr Bystrycmb69
Aliaksandr Bystry
authored andcommitted
Fix #70962: XML_OPTION_SKIP_WHITE strips embedded whitespace
We must never strip embedded whitespace; we only need to skip values when that option is set, and make sure that we keep BC regarding the different behavior for "cdata" and "complete" elements (for the former, the whole element is skipped; for the latter only the "value" key). We also fix erroneous `int` types which should actually be `size_t`. Co-authored-by: Christoph M. Becker <cmbecker69@gmx.de> Closes GH-7493.
1 parent 0badc7d commit a9661a5

File tree

3 files changed

+99
-53
lines changed

3 files changed

+99
-53
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ PHP NEWS
1313
- PCRE:
1414
. Fixed bug #81424 (PCRE2 10.35 JIT performance regression). (cmb)
1515

16+
- XML:
17+
. Fixed bug #70962 (XML_OPTION_SKIP_WHITE strips embedded whitespace).
18+
(Aliaksandr Bystry, cmb)
19+
1620
23 Dep 2021, PHP 7.4.24
1721

1822
- Core:

ext/xml/tests/bug70962.phpt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Bug #70962 (XML_OPTION_SKIP_WHITE strips embedded whitespace)
3+
--SKIPIF--
4+
<?php
5+
if (!extension_loaded('xml')) die('skip xml extension not available');
6+
?>
7+
--FILE--
8+
<?php
9+
function parseAndOutput($xml)
10+
{
11+
$parser = xml_parser_create();
12+
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1);
13+
14+
xml_parse_into_struct($parser, $xml, $values);
15+
16+
return $values;
17+
}
18+
19+
$xml = "<a><b>&lt;d&gt;\n &lt;e&gt;</b><![CDATA[ ]]><c>\n \t</c></a>";
20+
21+
$parsed = parseAndOutput($xml);
22+
23+
// Check embedded whitespace is not getting skipped.
24+
echo $parsed[1]['value'] . "\n";
25+
26+
// Check XML_OPTION_SKIP_WHITE ignores values of tags containing whitespace characters only.
27+
var_dump(isset($parsed[2]['value']));
28+
29+
// Check XML_OPTION_SKIP_WHITE ignores empty <![CDATA[ ]]> values.
30+
var_dump(count($parsed));
31+
32+
?>
33+
--EXPECT--
34+
<d>
35+
<e>
36+
bool(false)
37+
int(4)

ext/xml/xml.c

Lines changed: 58 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -886,72 +886,77 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
886886
zend_string *decoded_value;
887887

888888
decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
889-
for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
890-
switch (ZSTR_VAL(decoded_value)[i]) {
891-
case ' ':
892-
case '\t':
893-
case '\n':
894-
continue;
895-
default:
896-
doprint = 1;
889+
if (parser->skipwhite) {
890+
for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
891+
switch (ZSTR_VAL(decoded_value)[i]) {
892+
case ' ':
893+
case '\t':
894+
case '\n':
895+
continue;
896+
default:
897+
doprint = 1;
898+
break;
899+
}
900+
if (doprint) {
897901
break;
898-
}
899-
if (doprint) {
900-
break;
902+
}
901903
}
902904
}
903-
if (doprint || (! parser->skipwhite)) {
904-
if (parser->lastwasopen) {
905-
zval *myval;
906-
907-
/* check if the current tag already has a value - if yes append to that! */
908-
if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
909-
int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
910-
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
911-
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
912-
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
913-
zend_string_release_ex(decoded_value, 0);
914-
} else {
905+
906+
if (parser->lastwasopen) {
907+
zval *myval;
908+
909+
/* check if the current tag already has a value - if yes append to that! */
910+
if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
911+
size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
912+
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
913+
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
914+
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
915+
zend_string_release_ex(decoded_value, 0);
916+
} else {
917+
if (doprint || (! parser->skipwhite)) {
915918
add_assoc_str(parser->ctag, "value", decoded_value);
919+
} else {
920+
zend_string_release_ex(decoded_value, 0);
916921
}
922+
}
917923

918-
} else {
919-
zval tag;
920-
zval *curtag, *mytype, *myval;
921-
922-
ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
923-
if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
924-
if (!strcmp(Z_STRVAL_P(mytype), "cdata")) {
925-
if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
926-
int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
927-
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
928-
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
929-
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
930-
zend_string_release_ex(decoded_value, 0);
931-
return;
932-
}
924+
} else {
925+
zval tag;
926+
zval *curtag, *mytype, *myval;
927+
928+
ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
929+
if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
930+
if (!strcmp(Z_STRVAL_P(mytype), "cdata")) {
931+
if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
932+
size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
933+
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
934+
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
935+
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
936+
zend_string_release_ex(decoded_value, 0);
937+
return;
933938
}
934939
}
935-
break;
936-
} ZEND_HASH_FOREACH_END();
940+
}
941+
break;
942+
} ZEND_HASH_FOREACH_END();
937943

938-
if (parser->level <= XML_MAXLEVEL && parser->level > 0) {
939-
array_init(&tag);
944+
if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
945+
array_init(&tag);
940946

941-
_xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
947+
_xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
942948

943-
add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
944-
add_assoc_str(&tag, "value", decoded_value);
945-
add_assoc_string(&tag, "type", "cdata");
946-
add_assoc_long(&tag, "level", parser->level);
949+
add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
950+
add_assoc_str(&tag, "value", decoded_value);
951+
add_assoc_string(&tag, "type", "cdata");
952+
add_assoc_long(&tag, "level", parser->level);
947953

948-
zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
949-
} else if (parser->level == (XML_MAXLEVEL + 1)) {
950-
php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
951-
}
954+
zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
955+
} else if (parser->level == (XML_MAXLEVEL + 1)) {
956+
php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
957+
} else {
958+
zend_string_release_ex(decoded_value, 0);
952959
}
953-
} else {
954-
zend_string_release_ex(decoded_value, 0);
955960
}
956961
}
957962
}

0 commit comments

Comments
 (0)