Skip to content

[part of RFC] Implement PHP-specific extensions to Dom #14754

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ext/dom/dom_ce.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,6 @@ extern PHP_DOM_EXPORT zend_class_entry *dom_modern_xpath_class_entry;
#endif
extern PHP_DOM_EXPORT zend_class_entry *dom_namespace_node_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_adjacent_position_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_namespace_info_class_entry;

#endif /* DOM_CE_H */
2 changes: 2 additions & 0 deletions ext/dom/dom_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ zend_result dom_element_schema_type_info_read(dom_object *obj, zval *retval);
zend_result dom_element_inner_html_read(dom_object *obj, zval *retval);
zend_result dom_element_inner_html_write(dom_object *obj, zval *newval);
zend_result dom_element_class_list_read(dom_object *obj, zval *retval);
zend_result dom_modern_element_substituted_node_value_read(dom_object *obj, zval *retval);
zend_result dom_modern_element_substituted_node_value_write(dom_object *obj, zval *newval);

/* entity properties */
zend_result dom_entity_public_id_read(dom_object *obj, zval *retval);
Expand Down
215 changes: 215 additions & 0 deletions ext/dom/element.c
Original file line number Diff line number Diff line change
Expand Up @@ -1842,4 +1842,219 @@ PHP_METHOD(Dom_Element, closest)
dom_element_closest(thisp, intern, return_value, selectors_str);
}

zend_result dom_modern_element_substituted_node_value_read(dom_object *obj, zval *retval)
{
DOM_PROP_NODE(xmlNodePtr, nodep, obj);

xmlChar *content = xmlNodeGetContent(nodep);

if (UNEXPECTED(content == NULL)) {
php_dom_throw_error(INVALID_STATE_ERR, true);
return FAILURE;
} else {
ZVAL_STRING(retval, (const char *) content);
xmlFree(content);
}

return SUCCESS;
}

zend_result dom_modern_element_substituted_node_value_write(dom_object *obj, zval *newval)
{
DOM_PROP_NODE(xmlNodePtr, nodep, obj);

php_libxml_invalidate_node_list_cache(obj->document);
dom_remove_all_children(nodep);
xmlNodeSetContentLen(nodep, (xmlChar *) Z_STRVAL_P(newval), Z_STRLEN_P(newval));

return SUCCESS;
}

static void dom_element_get_in_scope_namespace_info(php_dom_libxml_ns_mapper *ns_mapper, HashTable *result, xmlNodePtr nodep, dom_object *intern)
{
HashTable prefix_to_ns_table;
zend_hash_init(&prefix_to_ns_table, 0, NULL, NULL, false);
zend_hash_real_init_mixed(&prefix_to_ns_table);

/* https://www.w3.org/TR/1999/REC-xpath-19991116/#namespace-nodes */
for (const xmlNode *cur = nodep; cur != NULL; cur = cur->parent) {
if (cur->type == XML_ELEMENT_NODE) {
/* Find the last attribute */
const xmlAttr *last = NULL;
for (const xmlAttr *attr = cur->properties; attr != NULL; attr = attr->next) {
last = attr;
}

/* Reversed loop because the parent traversal is reversed as well,
* this will keep the ordering consistent. */
for (const xmlAttr *attr = last; attr != NULL; attr = attr->prev) {
if (attr->ns != NULL && php_dom_ns_is_fast_ex(attr->ns, php_dom_ns_is_xmlns_magic_token)
&& attr->children != NULL && attr->children->content != NULL) {
const char *prefix = attr->ns->prefix == NULL ? NULL : (const char *) attr->name;
const char *key = prefix == NULL ? "" : prefix;
xmlNsPtr ns = php_dom_libxml_ns_mapper_get_ns_raw_strings_nullsafe(ns_mapper, prefix, (const char *) attr->children->content);
/* NULL is a valid value for the sentinel */
zval zv;
ZVAL_PTR(&zv, ns);
zend_hash_str_add(&prefix_to_ns_table, key, strlen(key), &zv);
}
}
}
}

xmlNsPtr ns;
zend_string *prefix;
ZEND_HASH_MAP_REVERSE_FOREACH_STR_KEY_PTR(&prefix_to_ns_table, prefix, ns) {
if (ZSTR_LEN(prefix) == 0 && (ns == NULL || ns->href == NULL || *ns->href == '\0')) {
/* Exception: "the value of the xmlns attribute for the nearest such element is non-empty" */
continue;
}

zval zv;
object_init_ex(&zv, dom_namespace_info_class_entry);
zend_object *obj = Z_OBJ(zv);

if (ZSTR_LEN(prefix) != 0) {
ZVAL_STR_COPY(OBJ_PROP_NUM(obj, 0), prefix);
} else {
ZVAL_NULL(OBJ_PROP_NUM(obj, 0));
}

if (ns != NULL && ns->href != NULL && *ns->href != '\0') {
ZVAL_STRING(OBJ_PROP_NUM(obj, 1), (const char *) ns->href);
} else {
ZVAL_NULL(OBJ_PROP_NUM(obj, 1));
}

php_dom_create_object(nodep, OBJ_PROP_NUM(obj, 2), intern);

zend_hash_next_index_insert_new(result, &zv);
} ZEND_HASH_FOREACH_END();

zend_hash_destroy(&prefix_to_ns_table);
}

PHP_METHOD(Dom_Element, getInScopeNamespaces)
{
zval *id;
xmlNode *nodep;
dom_object *intern;

ZEND_PARSE_PARAMETERS_NONE();

DOM_GET_THIS_OBJ(nodep, id, xmlNodePtr, intern);

php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(intern);

array_init(return_value);
HashTable *result = Z_ARRVAL_P(return_value);

dom_element_get_in_scope_namespace_info(ns_mapper, result, nodep, intern);
}

PHP_METHOD(Dom_Element, getDescendantNamespaces)
{
zval *id;
xmlNode *nodep;
dom_object *intern;

ZEND_PARSE_PARAMETERS_NONE();

DOM_GET_THIS_OBJ(nodep, id, xmlNodePtr, intern);

php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(intern);

array_init(return_value);
HashTable *result = Z_ARRVAL_P(return_value);

dom_element_get_in_scope_namespace_info(ns_mapper, result, nodep, intern);

xmlNodePtr cur = nodep->children;
while (cur != NULL) {
if (cur->type == XML_ELEMENT_NODE) {
/* TODO: this could be more optimized by updating the same HashTable repeatedly
* instead of recreating it on every node. */
dom_element_get_in_scope_namespace_info(ns_mapper, result, cur, intern);
}

cur = php_dom_next_in_tree_order(cur, nodep);
}
}

PHP_METHOD(Dom_Element, rename)
{
zend_string *namespace_uri, *qualified_name;
ZEND_PARSE_PARAMETERS_START(2, 2)
Z_PARAM_STR_OR_NULL(namespace_uri)
Z_PARAM_STR(qualified_name)
ZEND_PARSE_PARAMETERS_END();

zval *id;
dom_object *intern;
xmlNodePtr nodep;
DOM_GET_THIS_OBJ(nodep, id, xmlNodePtr, intern);

xmlChar *localname = NULL, *prefix = NULL;
int errorcode = dom_validate_and_extract(namespace_uri, qualified_name, &localname, &prefix);
if (UNEXPECTED(errorcode != 0)) {
php_dom_throw_error(errorcode, /* strict */ true);
goto cleanup;
}

if (nodep->type == XML_ATTRIBUTE_NODE) {
/* Check for duplicate attributes. */
xmlAttrPtr existing = xmlHasNsProp(nodep->parent, localname, namespace_uri && ZSTR_VAL(namespace_uri)[0] != '\0' ? BAD_CAST ZSTR_VAL(namespace_uri) : NULL);
if (existing != NULL && existing != (xmlAttrPtr) nodep) {
php_dom_throw_error_with_message(INVALID_MODIFICATION_ERR, "An attribute with the given name in the given namespace already exists", /* strict */ true);
goto cleanup;
}
} else {
ZEND_ASSERT(nodep->type == XML_ELEMENT_NODE);

/* Check for moving to or away from the HTML namespace. */
bool is_currently_html_ns = php_dom_ns_is_fast(nodep, php_dom_ns_is_html_magic_token);
bool will_be_html_ns = namespace_uri != NULL && zend_string_equals_literal(namespace_uri, DOM_XHTML_NS_URI);
if (is_currently_html_ns != will_be_html_ns) {
if (is_currently_html_ns) {
php_dom_throw_error_with_message(
INVALID_MODIFICATION_ERR,
"It is not possible to move an element out of the HTML namespace because the HTML namespace is tied to the HTMLElement class",
/* strict */ true
);
} else {
php_dom_throw_error_with_message(
INVALID_MODIFICATION_ERR,
"It is not possible to move an element into the HTML namespace because the HTML namespace is tied to the HTMLElement class",
/* strict */ true
);
}
goto cleanup;
}
}

php_libxml_invalidate_node_list_cache(intern->document);

php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(intern);

/* Update namespace uri + prefix by querying the namespace mapper */
/* prefix can be NULL here, but that is taken care of by the called APIs. */
nodep->ns = php_dom_libxml_ns_mapper_get_ns_raw_prefix_string(ns_mapper, prefix, xmlStrlen(prefix), namespace_uri);

/* Change the local name */
if (xmlDictOwns(nodep->doc->dict, nodep->name) != 1) {
xmlFree((xmlChar *) nodep->name);
}
const xmlChar *copy = xmlDictLookup(nodep->doc->dict, localname, -1);
if (copy != NULL) {
nodep->name = copy;
} else {
nodep->name = localname;
localname = NULL;
}

cleanup:
xmlFree(localname);
xmlFree(prefix);
}

#endif
4 changes: 4 additions & 0 deletions ext/dom/php_dom.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ PHP_DOM_EXPORT zend_class_entry *dom_modern_xpath_class_entry;
#endif
PHP_DOM_EXPORT zend_class_entry *dom_namespace_node_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_adjacent_position_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_namespace_info_class_entry;
/* }}} */

static zend_object_handlers dom_object_handlers;
Expand Down Expand Up @@ -840,6 +841,8 @@ PHP_MINIT_FUNCTION(dom)
DOM_REGISTER_PROP_HANDLER(&dom_namespace_node_prop_handlers, "parentElement", dom_node_parent_element_read, NULL);
zend_hash_add_new_ptr(&classes, dom_namespace_node_class_entry->name, &dom_namespace_node_prop_handlers);

dom_namespace_info_class_entry = register_class_Dom_NamespaceInfo();

dom_documentfragment_class_entry = register_class_DOMDocumentFragment(dom_node_class_entry, dom_parentnode_class_entry);
dom_documentfragment_class_entry->create_object = dom_objects_new;
dom_documentfragment_class_entry->default_object_handlers = &dom_object_handlers;
Expand Down Expand Up @@ -1066,6 +1069,7 @@ PHP_MINIT_FUNCTION(dom)
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "previousElementSibling", dom_node_previous_element_sibling_read, NULL);
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "nextElementSibling", dom_node_next_element_sibling_read, NULL);
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "innerHTML", dom_element_inner_html_read, dom_element_inner_html_write);
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "substitutedNodeValue", dom_modern_element_substituted_node_value_read, dom_modern_element_substituted_node_value_write);
zend_hash_merge(&dom_modern_element_prop_handlers, &dom_modern_node_prop_handlers, NULL, false);
DOM_OVERWRITE_PROP_HANDLER(&dom_modern_element_prop_handlers, "textContent", dom_node_text_content_read, dom_node_text_content_write);
zend_hash_add_new_ptr(&classes, dom_modern_element_class_entry->name, &dom_modern_element_prop_handlers);
Expand Down
27 changes: 27 additions & 0 deletions ext/dom/php_dom.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -1384,6 +1384,16 @@ public function closest(string $selectors): ?Element {}
public function matches(string $selectors): bool {}

public string $innerHTML;

public string $substitutedNodeValue;

/** @return list<NamespaceInfo> */
public function getInScopeNamespaces(): array {}

/** @return list<NamespaceInfo> */
public function getDescendantNamespaces(): array {}

public function rename(?string $namespaceURI, string $qualifiedName): void {}
}

class HTMLElement extends Element
Expand All @@ -1410,6 +1420,9 @@ class Attr extends Node

/** @implementation-alias DOMAttr::isId */
public function isId(): bool {}

/** @implementation-alias Dom\Element::rename */
public function rename(?string $namespaceURI, string $qualifiedName): void {}
}

class CharacterData extends Node implements ChildNode
Expand Down Expand Up @@ -1688,6 +1701,20 @@ public function count(): int {}
public function getIterator(): \Iterator {}
}

/**
* @not-serializable
* @strict-properties
*/
readonly final class NamespaceInfo
{
public ?string $prefix;
public ?string $namespaceURI;
public Element $element;

/** @implementation-alias Dom\Node::__construct */
private function __construct() {}
}

#ifdef LIBXML_XPATH_ENABLED
/** @not-serializable */
final class XPath
Expand Down
Loading
Loading