diff --git a/ext/xml/tests/bug70962.phpt b/ext/xml/tests/bug70962.phpt new file mode 100644 index 0000000000000..46a6d5dfca1b2 --- /dev/null +++ b/ext/xml/tests/bug70962.phpt @@ -0,0 +1,37 @@ +--TEST-- +Bug #70962: xml_parse_into_struct strips embedded whitespace with XML_OPTION_SKIP_WHITE +--SKIPIF-- + +--FILE-- +<d>\n <e>\n \t"; + +$parsed = parseAndOutput($xml); + +// Check embedded whitespace is not getting skipped. +echo $parsed[1]['value'] . "\n"; + +// Check XML_OPTION_SKIP_WHITE ignores values of tags containing whitespace characters only. +var_dump(isset($parsed[2]['value'])); + +// Check XML_OPTION_SKIP_WHITE ignores empty values. +var_dump(count($parsed)); + +?> +--EXPECT-- + + +bool(false) +int(4) diff --git a/ext/xml/xml.c b/ext/xml/xml.c index fd8aebe03a524..84e9bc2b96506 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -886,72 +886,77 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) zend_string *decoded_value; decoded_value = xml_utf8_decode(s, len, parser->target_encoding); - for (i = 0; i < ZSTR_LEN(decoded_value); i++) { - switch (ZSTR_VAL(decoded_value)[i]) { - case ' ': - case '\t': - case '\n': - continue; - default: - doprint = 1; + if (parser->skipwhite) { + for (i = 0; i < ZSTR_LEN(decoded_value); i++) { + switch (ZSTR_VAL(decoded_value)[i]) { + case ' ': + case '\t': + case '\n': + continue; + default: + doprint = 1; + break; + } + if (doprint) { break; - } - if (doprint) { - break; + } } } - if (doprint || (! parser->skipwhite)) { - if (parser->lastwasopen) { - zval *myval; - - /* check if the current tag already has a value - if yes append to that! */ - if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) { - int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); - Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); - strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), - ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); - zend_string_release_ex(decoded_value, 0); - } else { + + if (parser->lastwasopen) { + zval *myval; + + /* check if the current tag already has a value - if yes append to that! */ + if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) { + size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); + Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); + strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), + ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); + zend_string_release_ex(decoded_value, 0); + } else { + if (doprint || (! parser->skipwhite)) { add_assoc_str(parser->ctag, "value", decoded_value); + } else { + zend_string_release_ex(decoded_value, 0); } + } - } else { - zval tag; - zval *curtag, *mytype, *myval; - - ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) { - if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) { - if (!strcmp(Z_STRVAL_P(mytype), "cdata")) { - if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) { - int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); - Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); - strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), - ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); - zend_string_release_ex(decoded_value, 0); - return; - } + } else { + zval tag; + zval *curtag, *mytype, *myval; + + ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) { + if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) { + if (!strcmp(Z_STRVAL_P(mytype), "cdata")) { + if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) { + int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); + Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); + strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), + ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); + zend_string_release_ex(decoded_value, 0); + return; } } - break; - } ZEND_HASH_FOREACH_END(); + } + break; + } ZEND_HASH_FOREACH_END(); - if (parser->level <= XML_MAXLEVEL && parser->level > 0) { - array_init(&tag); + if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) { + array_init(&tag); - _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1])); + _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1])); - add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1])); - add_assoc_str(&tag, "value", decoded_value); - add_assoc_string(&tag, "type", "cdata"); - add_assoc_long(&tag, "level", parser->level); + add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1])); + add_assoc_str(&tag, "value", decoded_value); + add_assoc_string(&tag, "type", "cdata"); + add_assoc_long(&tag, "level", parser->level); - zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag); - } else if (parser->level == (XML_MAXLEVEL + 1)) { - php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated"); - } + zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag); + } else if (parser->level == (XML_MAXLEVEL + 1)) { + php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated"); + } else { + zend_string_release_ex(decoded_value, 0); } - } else { - zend_string_release_ex(decoded_value, 0); } } }