Skip to content

Add $outerHTML property to DOM #15887

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ext/dom/config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ if test "$PHP_DOM" != "no"; then
html5_parser.c
html5_serializer.c
infra.c
inner_html_mixin.c
inner_outer_html_mixin.c
namednodemap.c
namespace_compat.c
node.c
Expand Down
2 changes: 1 addition & 1 deletion ext/dom/config.w32
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ if (PHP_DOM == "yes") {
EXTENSION("dom", "php_dom.c attr.c document.c infra.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c private_data.c \
domexception.c processinginstruction.c \
cdatasection.c documentfragment.c domimplementation.c element.c inner_html_mixin.c \
cdatasection.c documentfragment.c domimplementation.c element.c inner_outer_html_mixin.c \
node.c characterdata.c documenttype.c \
entity.c nodelist.c html_collection.c text.c comment.c \
entityreference.c \
Expand Down
2 changes: 2 additions & 0 deletions ext/dom/dom_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ zend_result dom_element_id_write(dom_object *obj, zval *newval);
zend_result dom_element_schema_type_info_read(dom_object *obj, zval *retval);
zend_result dom_element_inner_html_read(dom_object *obj, zval *retval);
zend_result dom_element_inner_html_write(dom_object *obj, zval *newval);
zend_result dom_element_outer_html_read(dom_object *obj, zval *retval);
zend_result dom_element_outer_html_write(dom_object *obj, zval *newval);
zend_result dom_element_class_list_read(dom_object *obj, zval *retval);
zend_result dom_modern_element_substituted_node_value_read(dom_object *obj, zval *retval);
zend_result dom_modern_element_substituted_node_value_write(dom_object *obj, zval *newval);
Expand Down
133 changes: 117 additions & 16 deletions ext/dom/inner_html_mixin.c → ext/dom/inner_outer_html_mixin.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,9 @@ static int dom_write_smart_str(void *context, const char *buffer, int len)
return len;
}

/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin
* and https://w3c.github.io/DOM-Parsing/#dfn-fragment-serializing-algorithm */
zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
/* https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#fragment-serializing-algorithm-steps */
static zend_string *dom_element_html_fragment_serialize(dom_object *obj, xmlNodePtr node)
{
DOM_PROP_NODE(xmlNodePtr, node, obj);

/* 1. Let context document be the value of node's node document. */
const xmlDoc *context_document = node->doc;

Expand All @@ -73,7 +70,7 @@ zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
ctx.write_string = dom_inner_html_write_string;
ctx.write_string_len = dom_inner_html_write_string_len;
dom_html5_serialize(&ctx, node);
ZVAL_STR(retval, smart_str_extract(&output));
return smart_str_extract(&output);
}
/* 3. Otherwise, context document is an XML document; return an XML serialization of node passing the flag require well-formed. */
else {
Expand Down Expand Up @@ -104,11 +101,21 @@ zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
if (UNEXPECTED(status < 0)) {
smart_str_free_ex(&str, false);
php_dom_throw_error_with_message(SYNTAX_ERR, "The resulting XML serialization is not well-formed", true);
return FAILURE;
return NULL;
}
ZVAL_STR(retval, smart_str_extract(&str));
return smart_str_extract(&str);
}
}

/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin */
zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
{
DOM_PROP_NODE(xmlNodePtr, node, obj);
zend_string *serialization = dom_element_html_fragment_serialize(obj, node);
if (serialization == NULL) {
return FAILURE;
}
ZVAL_STR(retval, serialization);
return SUCCESS;
}

Expand Down Expand Up @@ -334,23 +341,31 @@ static xmlNodePtr dom_xml_fragment_parsing_algorithm(dom_object *obj, const xmlN
return NULL;
}

/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin
* and https://w3c.github.io/DOM-Parsing/#dfn-fragment-parsing-algorithm */
zend_result dom_element_inner_html_write(dom_object *obj, zval *newval)
/* https://w3c.github.io/DOM-Parsing/#dfn-fragment-parsing-algorithm */
static xmlNodePtr dom_parse_fragment(dom_object *obj, xmlNodePtr context_node, const zend_string *input)
{
DOM_PROP_NODE(xmlNodePtr, context_node, obj);

xmlNodePtr fragment;
if (context_node->doc->type == XML_DOCUMENT_NODE) {
fragment = dom_xml_fragment_parsing_algorithm(obj, context_node, Z_STR_P(newval));
return dom_xml_fragment_parsing_algorithm(obj, context_node, input);
} else {
fragment = dom_html_fragment_parsing_algorithm(obj, context_node, Z_STR_P(newval), obj->document->quirks_mode);
return dom_html_fragment_parsing_algorithm(obj, context_node, input, obj->document->quirks_mode);
}
}

/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin */
zend_result dom_element_inner_html_write(dom_object *obj, zval *newval)
{
/* 1. We don't do injection sinks, skip. */

/* 2. Let context be this. */
DOM_PROP_NODE(xmlNodePtr, context_node, obj);

/* 3. Let fragment be the result of invoking the fragment parsing algorithm steps with context and compliantString. */
xmlNodePtr fragment = dom_parse_fragment(obj, context_node, Z_STR_P(newval));
if (fragment == NULL) {
return FAILURE;
}

/* 4. If context is a template element, then set context to the template element's template contents (a DocumentFragment). */
if (php_dom_ns_is_fast(context_node, php_dom_ns_is_html_magic_token) && xmlStrEqual(context_node->name, BAD_CAST "template")) {
context_node = php_dom_ensure_templated_content(php_dom_get_private_data(obj), context_node);
if (context_node == NULL) {
Expand All @@ -359,8 +374,94 @@ zend_result dom_element_inner_html_write(dom_object *obj, zval *newval)
}
}

/* 5. Replace all with fragment within context. */
dom_remove_all_children(context_node);
return php_dom_pre_insert(obj->document, fragment, context_node, NULL) ? SUCCESS : FAILURE;
}

/* https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#the-outerhtml-property */
zend_result dom_element_outer_html_read(dom_object *obj, zval *retval)
{
DOM_PROP_NODE(xmlNodePtr, this, obj);

/* 1. Let element be a fictional node whose only child is this. */
xmlNode element;
memset(&element, 0, sizeof(element));
element.type = XML_DOCUMENT_FRAG_NODE;
element.children = element.last = this;
element.doc = this->doc;

xmlNodePtr old_parent = this->parent;
this->parent = &element;

/* 2. Return the result of running fragment serializing algorithm steps with element and true. */
zend_string *serialization = dom_element_html_fragment_serialize(obj, &element);

this->parent = old_parent;

if (serialization == NULL) {
return FAILURE;
}
ZVAL_STR(retval, serialization);
return SUCCESS;
}

/* https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#the-outerhtml-property */
zend_result dom_element_outer_html_write(dom_object *obj, zval *newval)
{
/* 1. We don't do injection sinks, skip. */

/* 2. Let parent be this's parent. */
DOM_PROP_NODE(xmlNodePtr, this, obj);
xmlNodePtr parent = this->parent;
bool created_parent = false;

/* 3. If parent is null, return. */
if (parent == NULL) {
return SUCCESS;
}

/* 4. If parent is a Document, throw. */
if (parent->type == XML_DOCUMENT_NODE || parent->type == XML_HTML_DOCUMENT_NODE) {
php_dom_throw_error(INVALID_MODIFICATION_ERR, true);
return FAILURE;
}

/* 5. If parent is a DocumentFragment, set parent to the result of creating an element given this's node document, body, and the HTML namespace. */
if (parent->type == XML_DOCUMENT_FRAG_NODE) {
xmlNsPtr html_ns = php_dom_libxml_ns_mapper_ensure_html_ns(php_dom_get_ns_mapper(obj));

parent = xmlNewDocNode(parent->doc, html_ns, BAD_CAST "body", NULL);
created_parent = true;
if (UNEXPECTED(parent == NULL)) {
php_dom_throw_error(INVALID_STATE_ERR, true);
return FAILURE;
}
}

/* 6. Let fragment be the result of invoking the fragment parsing algorithm steps given parent and compliantString. */
xmlNodePtr fragment = dom_parse_fragment(obj, parent, Z_STR_P(newval));
if (fragment == NULL) {
if (created_parent) {
xmlFreeNode(parent);
}
return FAILURE;
}

/* 7. Replace this with fragment within this's parent. */
if (!php_dom_pre_insert(obj->document, fragment, this->parent, this)) {
xmlFreeNode(fragment);
if (created_parent) {
xmlFreeNode(parent);
}
return FAILURE;
}
xmlUnlinkNode(this);
if (created_parent) {
ZEND_ASSERT(parent->children == NULL);
xmlFreeNode(parent);
}
return SUCCESS;
}

#endif
1 change: 1 addition & 0 deletions ext/dom/php_dom.c
Original file line number Diff line number Diff line change
Expand Up @@ -1111,6 +1111,7 @@ PHP_MINIT_FUNCTION(dom)
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "previousElementSibling", dom_node_previous_element_sibling_read, NULL);
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "nextElementSibling", dom_node_next_element_sibling_read, NULL);
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "innerHTML", dom_element_inner_html_read, dom_element_inner_html_write);
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "outerHTML", dom_element_outer_html_read, dom_element_outer_html_write);
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "substitutedNodeValue", dom_modern_element_substituted_node_value_read, dom_modern_element_substituted_node_value_write);
zend_hash_merge(&dom_modern_element_prop_handlers, &dom_modern_node_prop_handlers, NULL, false);
DOM_OVERWRITE_PROP_HANDLER(&dom_modern_element_prop_handlers, "textContent", dom_node_text_content_read, dom_node_text_content_write);
Expand Down
3 changes: 3 additions & 0 deletions ext/dom/php_dom.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -1688,6 +1688,9 @@ public function matches(string $selectors): bool {}
/** @virtual */
public string $innerHTML;

/** @virtual */
public string $outerHTML;

/** @virtual */
public string $substitutedNodeValue;

Expand Down
8 changes: 7 additions & 1 deletion ext/dom/php_dom_arginfo.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions ext/dom/tests/gh15192.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ $element = $dom2->firstChild;
$dom = new DomDocument();
var_dump($element);
?>
--EXPECTF--
object(Dom\HTMLElement)#3 (29) {
--EXPECT--
object(Dom\HTMLElement)#3 (30) {
["namespaceURI"]=>
string(28) "http://www.w3.org/1999/xhtml"
["prefix"]=>
Expand Down Expand Up @@ -40,6 +40,8 @@ object(Dom\HTMLElement)#3 (29) {
NULL
["innerHTML"]=>
string(36) "<head></head><body><p>foo</p></body>"
["outerHTML"]=>
string(49) "<html><head></head><body><p>foo</p></body></html>"
["substitutedNodeValue"]=>
string(3) "foo"
["nodeType"]=>
Expand Down
34 changes: 34 additions & 0 deletions ext/dom/tests/modern/html/parser/Element_outerHTML.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
--TEST--
Test writing Element::$outerHTML on HTML documents
--EXTENSIONS--
dom
--FILE--
<?php

$dom = Dom\HTMLDocument::createFromString('<p>foo</p>', LIBXML_NOERROR);
$p = $dom->body->firstChild;
$p->outerHTML = '<div></div>&nbsp;<p>'; // intentionally unclosed
echo $dom->saveXML(), "\n";
echo $dom->saveHtml(), "\n";
$div = $dom->body->firstChild;
$div->outerHTML = "invalid\xffutf-8𐍈𐍈𐍈";
echo $dom->saveXML(), "\n";
echo $dom->saveHtml(), "\n";

$dom->body->outerHTML = '<template><p>foo</p></template>';
var_dump($dom->body->querySelector('p')); // Should be NULL because the template contents do not participate in the DOM tree
echo $dom->saveXML(), "\n";
echo $dom->saveHtml(), "\n";

?>
--EXPECT--
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body><div></div> <p></p></body></html>
<html><head></head><body><div></div>&nbsp;<p></p></body></html>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body>invalid�utf-8𐍈𐍈𐍈 <p></p></body></html>
<html><head></head><body>invalid�utf-8𐍈𐍈𐍈&nbsp;<p></p></body></html>
NULL
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<html xmlns="http://www.w3.org/1999/xhtml"><head></head><head><template><p>foo</p></template></head><body></body></html>
<html><head></head><head><template><p>foo</p></template></head><body></body></html>
28 changes: 28 additions & 0 deletions ext/dom/tests/modern/html/serializer/Element_outerHTML.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
--TEST--
Test reading Element::$outerHTML on HTML documents
--EXTENSIONS--
dom
--FILE--
<?php

$dom = Dom\HTMLDocument::createFromString('<p>foo</p>', LIBXML_NOERROR);

$p = $dom->body->firstChild;
var_dump($p->outerHTML);

$root = $dom->documentElement;
var_dump($root->outerHTML);

$unattached_element = $dom->createElement('unattached');
var_dump($unattached_element->outerHTML);

$template = $dom->createElement('template');
$template->innerHTML = '<p>foo</p>';
var_dump($template->outerHTML);

?>
--EXPECT--
string(10) "<p>foo</p>"
string(49) "<html><head></head><body><p>foo</p></body></html>"
string(25) "<unattached></unattached>"
string(31) "<template><p>foo</p></template>"
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
--TEST--
Test reading Element::$outerHTML on HTML documents - invalid tree variation
--EXTENSIONS--
dom
--CREDITS--
Dennis Snell
--FILE--
<?php

$dom = Dom\HTMLDocument::createFromString('<a href="#one"><p>Link</p></a>', LIBXML_NOERROR);
$p = $dom->body->querySelector('p');
$p->outerHTML = '<a href="#two">Another Link</a>';
echo $dom->saveHTML();

?>
--EXPECT--
<html><head></head><body><a href="#one"><a href="#two">Another Link</a></a></body></html>
Loading