diff --git a/ext/xml/tests/bug70962.phpt b/ext/xml/tests/bug70962.phpt
new file mode 100644
index 0000000000000..46a6d5dfca1b2
--- /dev/null
+++ b/ext/xml/tests/bug70962.phpt
@@ -0,0 +1,37 @@
+--TEST--
+Bug #70962: xml_parse_into_struct strips embedded whitespace with XML_OPTION_SKIP_WHITE
+--SKIPIF--
+
+--FILE--
+<d>\n <e>\n \t";
+
+$parsed = parseAndOutput($xml);
+
+// Check embedded whitespace is not getting skipped.
+echo $parsed[1]['value'] . "\n";
+
+// Check XML_OPTION_SKIP_WHITE ignores values of tags containing whitespace characters only.
+var_dump(isset($parsed[2]['value']));
+
+// Check XML_OPTION_SKIP_WHITE ignores empty values.
+var_dump(count($parsed));
+
+?>
+--EXPECT--
+
+
+bool(false)
+int(4)
diff --git a/ext/xml/xml.c b/ext/xml/xml.c
index fd8aebe03a524..84e9bc2b96506 100644
--- a/ext/xml/xml.c
+++ b/ext/xml/xml.c
@@ -886,72 +886,77 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len)
zend_string *decoded_value;
decoded_value = xml_utf8_decode(s, len, parser->target_encoding);
- for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
- switch (ZSTR_VAL(decoded_value)[i]) {
- case ' ':
- case '\t':
- case '\n':
- continue;
- default:
- doprint = 1;
+ if (parser->skipwhite) {
+ for (i = 0; i < ZSTR_LEN(decoded_value); i++) {
+ switch (ZSTR_VAL(decoded_value)[i]) {
+ case ' ':
+ case '\t':
+ case '\n':
+ continue;
+ default:
+ doprint = 1;
+ break;
+ }
+ if (doprint) {
break;
- }
- if (doprint) {
- break;
+ }
}
}
- if (doprint || (! parser->skipwhite)) {
- if (parser->lastwasopen) {
- zval *myval;
-
- /* check if the current tag already has a value - if yes append to that! */
- if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
- int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
- Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
- strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
- ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
- zend_string_release_ex(decoded_value, 0);
- } else {
+
+ if (parser->lastwasopen) {
+ zval *myval;
+
+ /* check if the current tag already has a value - if yes append to that! */
+ if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) {
+ size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
+ Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
+ strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
+ ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
+ zend_string_release_ex(decoded_value, 0);
+ } else {
+ if (doprint || (! parser->skipwhite)) {
add_assoc_str(parser->ctag, "value", decoded_value);
+ } else {
+ zend_string_release_ex(decoded_value, 0);
}
+ }
- } else {
- zval tag;
- zval *curtag, *mytype, *myval;
-
- ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
- if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
- if (!strcmp(Z_STRVAL_P(mytype), "cdata")) {
- if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
- int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
- Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
- strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
- ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
- zend_string_release_ex(decoded_value, 0);
- return;
- }
+ } else {
+ zval tag;
+ zval *curtag, *mytype, *myval;
+
+ ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) {
+ if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) {
+ if (!strcmp(Z_STRVAL_P(mytype), "cdata")) {
+ if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) {
+ int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value);
+ Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0);
+ strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value),
+ ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1);
+ zend_string_release_ex(decoded_value, 0);
+ return;
}
}
- break;
- } ZEND_HASH_FOREACH_END();
+ }
+ break;
+ } ZEND_HASH_FOREACH_END();
- if (parser->level <= XML_MAXLEVEL && parser->level > 0) {
- array_init(&tag);
+ if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) {
+ array_init(&tag);
- _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
+ _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1]));
- add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
- add_assoc_str(&tag, "value", decoded_value);
- add_assoc_string(&tag, "type", "cdata");
- add_assoc_long(&tag, "level", parser->level);
+ add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1]));
+ add_assoc_str(&tag, "value", decoded_value);
+ add_assoc_string(&tag, "type", "cdata");
+ add_assoc_long(&tag, "level", parser->level);
- zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
- } else if (parser->level == (XML_MAXLEVEL + 1)) {
- php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
- }
+ zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag);
+ } else if (parser->level == (XML_MAXLEVEL + 1)) {
+ php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated");
+ } else {
+ zend_string_release_ex(decoded_value, 0);
}
- } else {
- zend_string_release_ex(decoded_value, 0);
}
}
}