Skip to content

Commit f55d78e

Browse files
committed
Merge branch 'PHP-7.4' into PHP-8.0
* PHP-7.4: Fix #70962: XML_OPTION_SKIP_WHITE strips embedded whitespace
2 parents 7257e7e + a9661a5 commit f55d78e

File tree

3 files changed

+99
-53
lines changed

3 files changed

+99
-53
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ PHP NEWS
1717
- PCRE:
1818
. Fixed bug #81424 (PCRE2 10.35 JIT performance regression). (cmb)
1919

20+
- XML:
21+
. Fixed bug #70962 (XML_OPTION_SKIP_WHITE strips embedded whitespace).
22+
(Aliaksandr Bystry, cmb)
23+
2024
23 Sep 2021, PHP 8.0.11
2125

2226
- Core:

ext/xml/tests/bug70962.phpt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Bug #70962 (XML_OPTION_SKIP_WHITE strips embedded whitespace)
3+
--SKIPIF--
4+
<?php
5+
if (!extension_loaded('xml')) die('skip xml extension not available');
6+
?>
7+
--FILE--
8+
<?php
9+
function parseAndOutput($xml)
10+
{
11+
$parser = xml_parser_create();
12+
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1);
13+
14+
xml_parse_into_struct($parser, $xml, $values);
15+
16+
return $values;
17+
}
18+
19+
$xml = "<a><b>&lt;d&gt;\n &lt;e&gt;</b><![CDATA[ ]]><c>\n \t</c></a>";
20+
21+
$parsed = parseAndOutput($xml);
22+
23+
// Check embedded whitespace is not getting skipped.
24+
echo $parsed[1]['value'] . "\n";
25+
26+
// Check XML_OPTION_SKIP_WHITE ignores values of tags containing whitespace characters only.
27+
var_dump(isset($parsed[2]['value']));
28+
29+
// Check XML_OPTION_SKIP_WHITE ignores empty <![CDATA[ ]]> values.
30+
var_dump(count($parsed));
31+
32+
?>
33+
--EXPECT--
34+
<d>
35+
<e>
36+
bool(false)
37+
int(4)

ext/xml/xml.c

Lines changed: 58 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -781,72 +781,77 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
781781
zend_string *decoded_value;
782782

783783
decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
784-
for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
785-
switch (ZSTR_VAL(decoded_value)[i]) {
786-
case ' ':
787-
case '\t':
788-
case '\n':
789-
continue;
790-
default:
791-
doprint = 1;
784+
if (parser->skipwhite) {
785+
for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
786+
switch (ZSTR_VAL(decoded_value)[i]) {
787+
case ' ':
788+
case '\t':
789+
case '\n':
790+
continue;
791+
default:
792+
doprint = 1;
793+
break;
794+
}
795+
if (doprint) {
792796
break;
793-
}
794-
if (doprint) {
795-
break;
797+
}
796798
}
797799
}
798-
if (doprint || (! parser->skipwhite)) {
799-
if (parser->lastwasopen) {
800-
zval *myval;
801-
802-
/* check if the current tag already has a value - if yes append to that! */
803-
if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
804-
int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
805-
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
806-
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
807-
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
808-
zend_string_release_ex(decoded_value, 0);
809-
} else {
800+
801+
if (parser->lastwasopen) {
802+
zval *myval;
803+
804+
/* check if the current tag already has a value - if yes append to that! */
805+
if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
806+
size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
807+
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
808+
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
809+
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
810+
zend_string_release_ex(decoded_value, 0);
811+
} else {
812+
if (doprint || (! parser->skipwhite)) {
810813
add_assoc_str(parser->ctag, "value", decoded_value);
814+
} else {
815+
zend_string_release_ex(decoded_value, 0);
811816
}
817+
}
812818

813-
} else {
814-
zval tag;
815-
zval *curtag, *mytype, *myval;
816-
817-
ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
818-
if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
819-
if (!strcmp(Z_STRVAL_P(mytype), "cdata")) {
820-
if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
821-
int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
822-
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
823-
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
824-
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
825-
zend_string_release_ex(decoded_value, 0);
826-
return;
827-
}
819+
} else {
820+
zval tag;
821+
zval *curtag, *mytype, *myval;
822+
823+
ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
824+
if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
825+
if (!strcmp(Z_STRVAL_P(mytype), "cdata")) {
826+
if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
827+
size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
828+
Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
829+
strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
830+
ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
831+
zend_string_release_ex(decoded_value, 0);
832+
return;
828833
}
829834
}
830-
break;
831-
} ZEND_HASH_FOREACH_END();
835+
}
836+
break;
837+
} ZEND_HASH_FOREACH_END();
832838

833-
if (parser->level <= XML_MAXLEVEL && parser->level > 0) {
834-
array_init(&tag);
839+
if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
840+
array_init(&tag);
835841

836-
_xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
842+
_xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
837843

838-
add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
839-
add_assoc_str(&tag, "value", decoded_value);
840-
add_assoc_string(&tag, "type", "cdata");
841-
add_assoc_long(&tag, "level", parser->level);
844+
add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
845+
add_assoc_str(&tag, "value", decoded_value);
846+
add_assoc_string(&tag, "type", "cdata");
847+
add_assoc_long(&tag, "level", parser->level);
842848

843-
zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
844-
} else if (parser->level == (XML_MAXLEVEL + 1)) {
845-
php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
846-
}
849+
zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
850+
} else if (parser->level == (XML_MAXLEVEL + 1)) {
851+
php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
852+
} else {
853+
zend_string_release_ex(decoded_value, 0);
847854
}
848-
} else {
849-
zend_string_release_ex(decoded_value, 0);
850855
}
851856
}
852857
}

0 commit comments

Comments
 (0)