Skip to content

[part of RFC] Implement Document::${body,head,title} #13791

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ PHP NEWS
. Implemented opt-in ext/dom spec compliance RFC. (nielsdos)
. Fixed bug #79701 (getElementById does not correctly work with duplicate
definitions). (nielsdos)
. Implemented "New ext-dom features in PHP 8.4" RFC. (nielsdos)

- Fileinfo:
. Update to libmagic 5.45. (nielsdos)
Expand Down
2 changes: 2 additions & 0 deletions UPGRADING
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,8 @@ PHP 8.4 UPGRADE NOTES
equivalents to the old DOM classes in the global namespaces.
The new classes follow the DOM living spec.
RFC: https://wiki.php.net/rfc/opt_in_dom_spec_compliance
. Implemented "New ext-dom features in PHP 8.4" RFC.
RFC: https://wiki.php.net/rfc/dom_additions_84

========================================
8. Removed Extensions and SAPIs
Expand Down
2 changes: 1 addition & 1 deletion ext/dom/config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if test "$PHP_DOM" != "no"; then
$LEXBOR_DIR/selectors/selectors.c \
$LEXBOR_DIR/ns/ns.c \
$LEXBOR_DIR/tag/tag.c"
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c \
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c infra.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c \
processinginstruction.c cdatasection.c \
Expand Down
2 changes: 1 addition & 1 deletion ext/dom/config.w32
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ if (PHP_DOM == "yes") {
ADD_EXTENSION_DEP('dom', 'libxml') &&
CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2")
) {
EXTENSION("dom", "php_dom.c attr.c document.c \
EXTENSION("dom", "php_dom.c attr.c document.c infra.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c processinginstruction.c \
cdatasection.c documentfragment.c domimplementation.c element.c \
Expand Down
1 change: 1 addition & 0 deletions ext/dom/dom_ce.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ extern PHP_DOM_EXPORT zend_class_entry *dom_attr_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_modern_attr_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_element_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_modern_element_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_html_element_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_text_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_modern_text_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_comment_class_entry;
Expand Down
5 changes: 5 additions & 0 deletions ext/dom/dom_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ zend_result dom_document_substitue_entities_write(dom_object *obj, zval *newval)

/* html5 document properties */
zend_result dom_html_document_encoding_write(dom_object *obj, zval *retval);
zend_result dom_html_document_body_read(dom_object *obj, zval *retval);
zend_result dom_html_document_body_write(dom_object *obj, zval *newval);
zend_result dom_html_document_head_read(dom_object *obj, zval *retval);
zend_result dom_html_document_title_read(dom_object *obj, zval *retval);
zend_result dom_html_document_title_write(dom_object *obj, zval *newval);

/* documenttype properties */
zend_result dom_documenttype_name_read(dom_object *obj, zval *retval);
Expand Down
282 changes: 282 additions & 0 deletions ext/dom/html_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "infra.h"
#include "html5_parser.h"
#include "html5_serializer.h"
#include "namespace_compat.h"
Expand Down Expand Up @@ -1358,4 +1359,285 @@ zend_result dom_html_document_encoding_write(dom_object *obj, zval *newval)
return SUCCESS;
}

static xmlNodePtr dom_html_document_element_read_raw(const xmlDoc *docp, bool (*accept)(const xmlChar *))
{
const xmlNode *root = xmlDocGetRootElement(docp);
if (root == NULL || !(php_dom_ns_is_fast(root, php_dom_ns_is_html_magic_token) && xmlStrEqual(root->name, BAD_CAST "html"))) {
return NULL;
}

xmlNodePtr cur = root->children;
while (cur != NULL) {
if (cur->type == XML_ELEMENT_NODE && php_dom_ns_is_fast(cur, php_dom_ns_is_html_magic_token) && accept(cur->name)) {
return cur;
}
cur = cur->next;
}

return NULL;
}

zend_result dom_html_document_element_read_helper(dom_object *obj, zval *retval, bool (*accept)(const xmlChar *))
{
DOM_PROP_NODE(const xmlDoc *, docp, obj);

const xmlNode *element = dom_html_document_element_read_raw(docp, accept);
if (element == NULL) {
ZVAL_NULL(retval);
} else {
php_dom_create_object((xmlNodePtr) element, retval, obj);
}

return SUCCESS;
}

static bool dom_accept_body_name(const xmlChar *name)
{
return xmlStrEqual(name, BAD_CAST "body") || xmlStrEqual(name, BAD_CAST "frameset");
}

static bool dom_accept_head_name(const xmlChar *name)
{
return xmlStrEqual(name, BAD_CAST "head");
}

/* https://html.spec.whatwg.org/#dom-document-body */
zend_result dom_html_document_body_read(dom_object *obj, zval *retval)
{
return dom_html_document_element_read_helper(obj, retval, dom_accept_body_name);
}

/* https://html.spec.whatwg.org/#dom-document-head */
zend_result dom_html_document_head_read(dom_object *obj, zval *retval)
{
return dom_html_document_element_read_helper(obj, retval, dom_accept_head_name);
}

/* https://html.spec.whatwg.org/#dom-document-body */
zend_result dom_html_document_body_write(dom_object *obj, zval *newval)
{
DOM_PROP_NODE(xmlDocPtr, docp, obj);

/* 1. If the new value is not a body or frameset element, then throw a "HierarchyRequestError" DOMException. */
if (Z_TYPE_P(newval) != IS_NULL) {
dom_object *newval_intern = Z_DOMOBJ_P(newval);
if (newval_intern->ptr != NULL) {
xmlNodePtr newval_node = ((php_libxml_node_ptr *) newval_intern->ptr)->node;
/* Note: because this property has type HTMLElement, we know the namespace is correct. */
if (dom_accept_body_name(newval_node->name)) {
/* 2. If the new value is the same as the body element, return. */
const xmlNode *current_body_element = dom_html_document_element_read_raw(docp, dom_accept_body_name);
if (current_body_element == newval_node) {
return SUCCESS;
}

/* 3. If the body element is not null, then replace the body element with the new value within the body element's parent and return. */
if (current_body_element != NULL) {
php_dom_adopt_node(newval_node, obj, docp);
xmlNodePtr old = xmlReplaceNode((xmlNodePtr) current_body_element, newval_node);
if (old != NULL && old->_private == NULL) {
php_libxml_node_free_resource(old);
}
return SUCCESS;
}

/* 4. If there is no document element, throw a "HierarchyRequestError" DOMException. */
xmlNodePtr root = xmlDocGetRootElement(docp);
if (root == NULL) {
php_dom_throw_error_with_message(HIERARCHY_REQUEST_ERR, "A body can only be set if there is a document element", true);
return FAILURE;
}

/* 5. Append the new value to the document element. */
php_dom_adopt_node(newval_node, obj, docp);
xmlAddChild(root, newval_node);
return SUCCESS;
}
}
}

php_dom_throw_error_with_message(HIERARCHY_REQUEST_ERR, "The new body must either be a body or a frameset tag", true);
return FAILURE;
}

/* https://dom.spec.whatwg.org/#concept-child-text-content */
static zend_string *dom_get_child_text_content(const xmlNode *node)
{
smart_str content = {0};

const xmlNode *text = node->children;
while (text != NULL) {
if (text->type == XML_TEXT_NODE || text->type == XML_CDATA_SECTION_NODE) {
smart_str_appends(&content, (const char *) text->content);
}
text = text->next;
}

return smart_str_extract(&content);
}

/* https://html.spec.whatwg.org/#the-title-element-2 */
static xmlNodePtr dom_get_title_element(const xmlDoc *doc)
{
xmlNodePtr node = doc->children;

while (node != NULL) {
if (node->type == XML_ELEMENT_NODE) {
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) && xmlStrEqual(node->name, BAD_CAST "title")) {
break;
}
}

node = php_dom_next_in_tree_order(node, NULL);
}

return node;
}

/* The subtle difference is that this is about the direct title descendant of the svg element,
* whereas the html variant of this function is about the first in-tree title element. */
static xmlNodePtr dom_get_svg_title_element(xmlNodePtr svg)
{
xmlNodePtr cur = svg->children;

while (cur != NULL) {
if (cur->type == XML_ELEMENT_NODE
&& php_dom_ns_is_fast(cur, php_dom_ns_is_svg_magic_token) && xmlStrEqual(cur->name, BAD_CAST "title")) {
break;
}
cur = cur->next;
}

return cur;
}

/* https://html.spec.whatwg.org/#document.title */
zend_result dom_html_document_title_read(dom_object *obj, zval *retval)
{
DOM_PROP_NODE(const xmlDoc *, docp, obj);
xmlNodePtr root = xmlDocGetRootElement(docp);

if (root == NULL) {
ZVAL_EMPTY_STRING(retval);
return SUCCESS;
}

zend_string *value = zend_empty_string;

/* 1. If the document element is an SVG svg element,
* then let value be the child text content of the first SVG title element that is a child of the document element. */
if (php_dom_ns_is_fast(root, php_dom_ns_is_svg_magic_token) && xmlStrEqual(root->name, BAD_CAST "svg")) {
const xmlNode *title = dom_get_svg_title_element(root);
if (title != NULL) {
value = dom_get_child_text_content(title);
}
} else {
/* 2. Otherwise, let value be the child text content of the title element,
* or the empty string if the title element is null. */
const xmlNode *title = dom_get_title_element(docp);
if (title != NULL) {
value = dom_get_child_text_content(title);
}
}

/* 3. Strip and collapse ASCII whitespace in value. */
value = dom_strip_and_collapse_ascii_whitespace(value);

/* 4. Return value. */
ZVAL_STR(retval, value);

return SUCCESS;
}

static void dom_string_replace_all(xmlDocPtr docp, xmlNodePtr element, zval *zv)
{
dom_remove_all_children(element);
xmlNode *text = xmlNewDocText(docp, BAD_CAST Z_STRVAL_P(zv));
xmlAddChild(element, text);
}

/* https://html.spec.whatwg.org/#document.title */
zend_result dom_html_document_title_write(dom_object *obj, zval *newval)
{
DOM_PROP_NODE(xmlDocPtr, docp, obj);
xmlNodePtr root = xmlDocGetRootElement(docp);

if (root == NULL) {
return SUCCESS;
}

/* If the document element is an SVG svg element */
if (php_dom_ns_is_fast(root, php_dom_ns_is_svg_magic_token) && xmlStrEqual(root->name, BAD_CAST "svg")) {
/* 1. If there is an SVG title element that is a child of the document element, let element be the first such element. */
xmlNodePtr element = dom_get_svg_title_element(root);

/* 2. Otherwise: */
if (element == NULL) {
/* 2.1. Let element be the result of creating an element given the document element's node document,
* title, and the SVG namespace. */

/* Annoyingly, we must create it in the svg namespace _without_ prefix... */
xmlNsPtr ns = root->ns;
if (ns->prefix != NULL) {
/* Slow path... */
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(obj);
zend_string *href = ZSTR_INIT_LITERAL(DOM_SVG_NS_URI, false);
ns = php_dom_libxml_ns_mapper_get_ns(ns_mapper, zend_empty_string, href);
zend_string_release_ex(href, false);
}

element = xmlNewDocNode(docp, ns, BAD_CAST "title", NULL);
if (UNEXPECTED(element == NULL)) {
php_dom_throw_error(INVALID_STATE_ERR, true);
return FAILURE;
}

/* 2.2. Insert element as the first child of the document element. */
if (root->children == NULL) {
root->last = element;
} else {
element->next = root->children;
root->children->prev = element;
}
root->children = element;
element->parent = root;
}

/* 3. String replace all with the given value within element. */
dom_string_replace_all(docp, element, newval);
}
/* If the document element is in the HTML namespace */
else if (php_dom_ns_is_fast(root, php_dom_ns_is_html_magic_token)) {
/* 1. If the title element is null and the head element is null, then return. */
xmlNodePtr title = dom_get_title_element(docp);
xmlNodePtr head = dom_html_document_element_read_raw(docp, dom_accept_head_name);
if (title == NULL && head == NULL) {
return SUCCESS;
}
Comment on lines +1611 to +1616
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a choice from the DOM spec. But okay sure.


/* 2. If the title element is non-null, let element be the title element. */
xmlNodePtr element = title;

/* 3. Otherwise: */
if (element == NULL) {
/* 3.1. Let element be the result of creating an element given the document element's node document, title,
* and the HTML namespace. */
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(obj);
element = xmlNewDocNode(docp, php_dom_libxml_ns_mapper_ensure_html_ns(ns_mapper), BAD_CAST "title", NULL);
if (UNEXPECTED(element == NULL)) {
php_dom_throw_error(INVALID_STATE_ERR, true);
return FAILURE;
}

/* 3.2. Append element to the head element. */
xmlAddChild(head, element);
}

/* 4. String replace all with the given value within element. */
dom_string_replace_all(docp, element, newval);
}

return SUCCESS;
}

#endif /* HAVE_LIBXML && HAVE_DOM */
Loading
Loading