Skip to content

Commit cf914f4

Browse files
authored
Implement PHP-specific extensions to Dom (#14754)
See RFC: https://wiki.php.net/rfc/dom_additions_84
1 parent 5b673e9 commit cf914f4

14 files changed

+807
-1
lines changed

ext/dom/dom_ce.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,6 @@ extern PHP_DOM_EXPORT zend_class_entry *dom_modern_xpath_class_entry;
6969
#endif
7070
extern PHP_DOM_EXPORT zend_class_entry *dom_namespace_node_class_entry;
7171
extern PHP_DOM_EXPORT zend_class_entry *dom_adjacent_position_class_entry;
72+
extern PHP_DOM_EXPORT zend_class_entry *dom_namespace_info_class_entry;
7273

7374
#endif /* DOM_CE_H */

ext/dom/dom_properties.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ zend_result dom_element_schema_type_info_read(dom_object *obj, zval *retval);
8686
zend_result dom_element_inner_html_read(dom_object *obj, zval *retval);
8787
zend_result dom_element_inner_html_write(dom_object *obj, zval *newval);
8888
zend_result dom_element_class_list_read(dom_object *obj, zval *retval);
89+
zend_result dom_modern_element_substituted_node_value_read(dom_object *obj, zval *retval);
90+
zend_result dom_modern_element_substituted_node_value_write(dom_object *obj, zval *newval);
8991

9092
/* entity properties */
9193
zend_result dom_entity_public_id_read(dom_object *obj, zval *retval);

ext/dom/element.c

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1842,4 +1842,219 @@ PHP_METHOD(Dom_Element, closest)
18421842
dom_element_closest(thisp, intern, return_value, selectors_str);
18431843
}
18441844

1845+
zend_result dom_modern_element_substituted_node_value_read(dom_object *obj, zval *retval)
1846+
{
1847+
DOM_PROP_NODE(xmlNodePtr, nodep, obj);
1848+
1849+
xmlChar *content = xmlNodeGetContent(nodep);
1850+
1851+
if (UNEXPECTED(content == NULL)) {
1852+
php_dom_throw_error(INVALID_STATE_ERR, true);
1853+
return FAILURE;
1854+
} else {
1855+
ZVAL_STRING(retval, (const char *) content);
1856+
xmlFree(content);
1857+
}
1858+
1859+
return SUCCESS;
1860+
}
1861+
1862+
zend_result dom_modern_element_substituted_node_value_write(dom_object *obj, zval *newval)
1863+
{
1864+
DOM_PROP_NODE(xmlNodePtr, nodep, obj);
1865+
1866+
php_libxml_invalidate_node_list_cache(obj->document);
1867+
dom_remove_all_children(nodep);
1868+
xmlNodeSetContentLen(nodep, (xmlChar *) Z_STRVAL_P(newval), Z_STRLEN_P(newval));
1869+
1870+
return SUCCESS;
1871+
}
1872+
1873+
static void dom_element_get_in_scope_namespace_info(php_dom_libxml_ns_mapper *ns_mapper, HashTable *result, xmlNodePtr nodep, dom_object *intern)
1874+
{
1875+
HashTable prefix_to_ns_table;
1876+
zend_hash_init(&prefix_to_ns_table, 0, NULL, NULL, false);
1877+
zend_hash_real_init_mixed(&prefix_to_ns_table);
1878+
1879+
/* https://www.w3.org/TR/1999/REC-xpath-19991116/#namespace-nodes */
1880+
for (const xmlNode *cur = nodep; cur != NULL; cur = cur->parent) {
1881+
if (cur->type == XML_ELEMENT_NODE) {
1882+
/* Find the last attribute */
1883+
const xmlAttr *last = NULL;
1884+
for (const xmlAttr *attr = cur->properties; attr != NULL; attr = attr->next) {
1885+
last = attr;
1886+
}
1887+
1888+
/* Reversed loop because the parent traversal is reversed as well,
1889+
* this will keep the ordering consistent. */
1890+
for (const xmlAttr *attr = last; attr != NULL; attr = attr->prev) {
1891+
if (attr->ns != NULL && php_dom_ns_is_fast_ex(attr->ns, php_dom_ns_is_xmlns_magic_token)
1892+
&& attr->children != NULL && attr->children->content != NULL) {
1893+
const char *prefix = attr->ns->prefix == NULL ? NULL : (const char *) attr->name;
1894+
const char *key = prefix == NULL ? "" : prefix;
1895+
xmlNsPtr ns = php_dom_libxml_ns_mapper_get_ns_raw_strings_nullsafe(ns_mapper, prefix, (const char *) attr->children->content);
1896+
/* NULL is a valid value for the sentinel */
1897+
zval zv;
1898+
ZVAL_PTR(&zv, ns);
1899+
zend_hash_str_add(&prefix_to_ns_table, key, strlen(key), &zv);
1900+
}
1901+
}
1902+
}
1903+
}
1904+
1905+
xmlNsPtr ns;
1906+
zend_string *prefix;
1907+
ZEND_HASH_MAP_REVERSE_FOREACH_STR_KEY_PTR(&prefix_to_ns_table, prefix, ns) {
1908+
if (ZSTR_LEN(prefix) == 0 && (ns == NULL || ns->href == NULL || *ns->href == '\0')) {
1909+
/* Exception: "the value of the xmlns attribute for the nearest such element is non-empty" */
1910+
continue;
1911+
}
1912+
1913+
zval zv;
1914+
object_init_ex(&zv, dom_namespace_info_class_entry);
1915+
zend_object *obj = Z_OBJ(zv);
1916+
1917+
if (ZSTR_LEN(prefix) != 0) {
1918+
ZVAL_STR_COPY(OBJ_PROP_NUM(obj, 0), prefix);
1919+
} else {
1920+
ZVAL_NULL(OBJ_PROP_NUM(obj, 0));
1921+
}
1922+
1923+
if (ns != NULL && ns->href != NULL && *ns->href != '\0') {
1924+
ZVAL_STRING(OBJ_PROP_NUM(obj, 1), (const char *) ns->href);
1925+
} else {
1926+
ZVAL_NULL(OBJ_PROP_NUM(obj, 1));
1927+
}
1928+
1929+
php_dom_create_object(nodep, OBJ_PROP_NUM(obj, 2), intern);
1930+
1931+
zend_hash_next_index_insert_new(result, &zv);
1932+
} ZEND_HASH_FOREACH_END();
1933+
1934+
zend_hash_destroy(&prefix_to_ns_table);
1935+
}
1936+
1937+
PHP_METHOD(Dom_Element, getInScopeNamespaces)
1938+
{
1939+
zval *id;
1940+
xmlNode *nodep;
1941+
dom_object *intern;
1942+
1943+
ZEND_PARSE_PARAMETERS_NONE();
1944+
1945+
DOM_GET_THIS_OBJ(nodep, id, xmlNodePtr, intern);
1946+
1947+
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(intern);
1948+
1949+
array_init(return_value);
1950+
HashTable *result = Z_ARRVAL_P(return_value);
1951+
1952+
dom_element_get_in_scope_namespace_info(ns_mapper, result, nodep, intern);
1953+
}
1954+
1955+
PHP_METHOD(Dom_Element, getDescendantNamespaces)
1956+
{
1957+
zval *id;
1958+
xmlNode *nodep;
1959+
dom_object *intern;
1960+
1961+
ZEND_PARSE_PARAMETERS_NONE();
1962+
1963+
DOM_GET_THIS_OBJ(nodep, id, xmlNodePtr, intern);
1964+
1965+
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(intern);
1966+
1967+
array_init(return_value);
1968+
HashTable *result = Z_ARRVAL_P(return_value);
1969+
1970+
dom_element_get_in_scope_namespace_info(ns_mapper, result, nodep, intern);
1971+
1972+
xmlNodePtr cur = nodep->children;
1973+
while (cur != NULL) {
1974+
if (cur->type == XML_ELEMENT_NODE) {
1975+
/* TODO: this could be more optimized by updating the same HashTable repeatedly
1976+
* instead of recreating it on every node. */
1977+
dom_element_get_in_scope_namespace_info(ns_mapper, result, cur, intern);
1978+
}
1979+
1980+
cur = php_dom_next_in_tree_order(cur, nodep);
1981+
}
1982+
}
1983+
1984+
PHP_METHOD(Dom_Element, rename)
1985+
{
1986+
zend_string *namespace_uri, *qualified_name;
1987+
ZEND_PARSE_PARAMETERS_START(2, 2)
1988+
Z_PARAM_STR_OR_NULL(namespace_uri)
1989+
Z_PARAM_STR(qualified_name)
1990+
ZEND_PARSE_PARAMETERS_END();
1991+
1992+
zval *id;
1993+
dom_object *intern;
1994+
xmlNodePtr nodep;
1995+
DOM_GET_THIS_OBJ(nodep, id, xmlNodePtr, intern);
1996+
1997+
xmlChar *localname = NULL, *prefix = NULL;
1998+
int errorcode = dom_validate_and_extract(namespace_uri, qualified_name, &localname, &prefix);
1999+
if (UNEXPECTED(errorcode != 0)) {
2000+
php_dom_throw_error(errorcode, /* strict */ true);
2001+
goto cleanup;
2002+
}
2003+
2004+
if (nodep->type == XML_ATTRIBUTE_NODE) {
2005+
/* Check for duplicate attributes. */
2006+
xmlAttrPtr existing = xmlHasNsProp(nodep->parent, localname, namespace_uri && ZSTR_VAL(namespace_uri)[0] != '\0' ? BAD_CAST ZSTR_VAL(namespace_uri) : NULL);
2007+
if (existing != NULL && existing != (xmlAttrPtr) nodep) {
2008+
php_dom_throw_error_with_message(INVALID_MODIFICATION_ERR, "An attribute with the given name in the given namespace already exists", /* strict */ true);
2009+
goto cleanup;
2010+
}
2011+
} else {
2012+
ZEND_ASSERT(nodep->type == XML_ELEMENT_NODE);
2013+
2014+
/* Check for moving to or away from the HTML namespace. */
2015+
bool is_currently_html_ns = php_dom_ns_is_fast(nodep, php_dom_ns_is_html_magic_token);
2016+
bool will_be_html_ns = namespace_uri != NULL && zend_string_equals_literal(namespace_uri, DOM_XHTML_NS_URI);
2017+
if (is_currently_html_ns != will_be_html_ns) {
2018+
if (is_currently_html_ns) {
2019+
php_dom_throw_error_with_message(
2020+
INVALID_MODIFICATION_ERR,
2021+
"It is not possible to move an element out of the HTML namespace because the HTML namespace is tied to the HTMLElement class",
2022+
/* strict */ true
2023+
);
2024+
} else {
2025+
php_dom_throw_error_with_message(
2026+
INVALID_MODIFICATION_ERR,
2027+
"It is not possible to move an element into the HTML namespace because the HTML namespace is tied to the HTMLElement class",
2028+
/* strict */ true
2029+
);
2030+
}
2031+
goto cleanup;
2032+
}
2033+
}
2034+
2035+
php_libxml_invalidate_node_list_cache(intern->document);
2036+
2037+
php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(intern);
2038+
2039+
/* Update namespace uri + prefix by querying the namespace mapper */
2040+
/* prefix can be NULL here, but that is taken care of by the called APIs. */
2041+
nodep->ns = php_dom_libxml_ns_mapper_get_ns_raw_prefix_string(ns_mapper, prefix, xmlStrlen(prefix), namespace_uri);
2042+
2043+
/* Change the local name */
2044+
if (xmlDictOwns(nodep->doc->dict, nodep->name) != 1) {
2045+
xmlFree((xmlChar *) nodep->name);
2046+
}
2047+
const xmlChar *copy = xmlDictLookup(nodep->doc->dict, localname, -1);
2048+
if (copy != NULL) {
2049+
nodep->name = copy;
2050+
} else {
2051+
nodep->name = localname;
2052+
localname = NULL;
2053+
}
2054+
2055+
cleanup:
2056+
xmlFree(localname);
2057+
xmlFree(prefix);
2058+
}
2059+
18452060
#endif

ext/dom/php_dom.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ PHP_DOM_EXPORT zend_class_entry *dom_modern_xpath_class_entry;
8989
#endif
9090
PHP_DOM_EXPORT zend_class_entry *dom_namespace_node_class_entry;
9191
PHP_DOM_EXPORT zend_class_entry *dom_adjacent_position_class_entry;
92+
PHP_DOM_EXPORT zend_class_entry *dom_namespace_info_class_entry;
9293
/* }}} */
9394

9495
static zend_object_handlers dom_object_handlers;
@@ -840,6 +841,8 @@ PHP_MINIT_FUNCTION(dom)
840841
DOM_REGISTER_PROP_HANDLER(&dom_namespace_node_prop_handlers, "parentElement", dom_node_parent_element_read, NULL);
841842
zend_hash_add_new_ptr(&classes, dom_namespace_node_class_entry->name, &dom_namespace_node_prop_handlers);
842843

844+
dom_namespace_info_class_entry = register_class_Dom_NamespaceInfo();
845+
843846
dom_documentfragment_class_entry = register_class_DOMDocumentFragment(dom_node_class_entry, dom_parentnode_class_entry);
844847
dom_documentfragment_class_entry->create_object = dom_objects_new;
845848
dom_documentfragment_class_entry->default_object_handlers = &dom_object_handlers;
@@ -1066,6 +1069,7 @@ PHP_MINIT_FUNCTION(dom)
10661069
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "previousElementSibling", dom_node_previous_element_sibling_read, NULL);
10671070
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "nextElementSibling", dom_node_next_element_sibling_read, NULL);
10681071
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "innerHTML", dom_element_inner_html_read, dom_element_inner_html_write);
1072+
DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "substitutedNodeValue", dom_modern_element_substituted_node_value_read, dom_modern_element_substituted_node_value_write);
10691073
zend_hash_merge(&dom_modern_element_prop_handlers, &dom_modern_node_prop_handlers, NULL, false);
10701074
DOM_OVERWRITE_PROP_HANDLER(&dom_modern_element_prop_handlers, "textContent", dom_node_text_content_read, dom_node_text_content_write);
10711075
zend_hash_add_new_ptr(&classes, dom_modern_element_class_entry->name, &dom_modern_element_prop_handlers);

ext/dom/php_dom.stub.php

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,6 +1384,16 @@ public function closest(string $selectors): ?Element {}
13841384
public function matches(string $selectors): bool {}
13851385

13861386
public string $innerHTML;
1387+
1388+
public string $substitutedNodeValue;
1389+
1390+
/** @return list<NamespaceInfo> */
1391+
public function getInScopeNamespaces(): array {}
1392+
1393+
/** @return list<NamespaceInfo> */
1394+
public function getDescendantNamespaces(): array {}
1395+
1396+
public function rename(?string $namespaceURI, string $qualifiedName): void {}
13871397
}
13881398

13891399
class HTMLElement extends Element
@@ -1410,6 +1420,9 @@ class Attr extends Node
14101420

14111421
/** @implementation-alias DOMAttr::isId */
14121422
public function isId(): bool {}
1423+
1424+
/** @implementation-alias Dom\Element::rename */
1425+
public function rename(?string $namespaceURI, string $qualifiedName): void {}
14131426
}
14141427

14151428
class CharacterData extends Node implements ChildNode
@@ -1688,6 +1701,20 @@ public function count(): int {}
16881701
public function getIterator(): \Iterator {}
16891702
}
16901703

1704+
/**
1705+
* @not-serializable
1706+
* @strict-properties
1707+
*/
1708+
readonly final class NamespaceInfo
1709+
{
1710+
public ?string $prefix;
1711+
public ?string $namespaceURI;
1712+
public Element $element;
1713+
1714+
/** @implementation-alias Dom\Node::__construct */
1715+
private function __construct() {}
1716+
}
1717+
16911718
#ifdef LIBXML_XPATH_ENABLED
16921719
/** @not-serializable */
16931720
final class XPath

0 commit comments

Comments
 (0)