From 0ff0ee3e7cc0a2fcd64998b7f20e17ae2dcd607b Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Mon, 29 May 2023 16:53:32 +0200
Subject: [PATCH 01/18] Implement iteration cache, item cache and length cache
for node list iteration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The current implementation follows the spec requirement that the list
must be "live". This means that changes in the document must be
reflected in the existing node lists without requiring the user to
refetch the node list.
The consequence is that getting any item, or the length of the list,
always starts searching from the root element of the node list. This
results in O(n) time to get any item or the length. If there's a for
loop over the node list, this means the iterations will take O(n²) time
in total. This causes real-world performance issues with potential for
downtime (see GH-11308 and its references for details).
We fix this by introducing a caching strategy. We cache the last
iterated object in the iterator, the last requested item in the node
list, and the last length computation. To invalidate the cache, we
simply count the number of modifications made to the containing
document. If the modification number does not match what the number was
during caching, we know the document has been modified and the cache is
invalid. If this ever overflows, we saturate the modification number and
don't do any caching anymore. Note that we don't check for overflow on
64-bit systems because it would take hundreds of years to overflow.
Fixes GH-11308.
---
UPGRADING.INTERNALS | 6 ++
ext/dom/document.c | 26 ++++++
ext/dom/dom_iterators.c | 24 +++--
ext/dom/node.c | 12 +++
ext/dom/nodelist.c | 91 +++++++++++++++++--
ext/dom/parentnode.c | 10 ++
ext/dom/php_dom.c | 11 ++-
ext/dom/php_dom.h | 39 ++++++++
...ocument_getElementsByTagName_liveness.phpt | 21 +++++
.../DOMDocument_item_cache_invalidation.phpt | 69 ++++++++++++++
...DOMDocument_length_cache_invalidation.phpt | 34 +++++++
...ocument_liveness_caching_invalidation.phpt | 26 ++++++
...getElementsByTagName_without_document.phpt | 14 +++
ext/libxml/libxml.c | 8 +-
ext/libxml/php_libxml.h | 24 +++++
15 files changed, 394 insertions(+), 21 deletions(-)
create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt
create mode 100644 ext/dom/tests/DOMDocument_item_cache_invalidation.phpt
create mode 100644 ext/dom/tests/DOMDocument_length_cache_invalidation.phpt
create mode 100644 ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt
create mode 100644 ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt
diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
index 99b609a115b00..4368eac6ad5a5 100644
--- a/UPGRADING.INTERNALS
+++ b/UPGRADING.INTERNALS
@@ -120,6 +120,12 @@ PHP 8.3 INTERNALS UPGRADE NOTES
- A new function dom_get_doc_props_read_only() is added to gather the document
properties in a read-only way. This function avoids allocation when there are
no document properties changed yet.
+ - The node list returned by DOMNode::getElementsByTagName() and
+ DOMNode::getElementsByTagNameNS() now caches the length and the last requested item.
+ This means that the length and the last requested item are not recalculated
+ when the node list is iterated over multiple times.
+ If you do not use the internal PHP dom APIs to modify the document, you need to
+ manually invalidate the cache using php_dom_invalidate_node_list_cache().
========================
4. OpCode changes
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 13324645e987b..47a9922ba0f4e 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -847,6 +847,8 @@ PHP_METHOD(DOMDocument, importNode)
}
}
+ php_dom_invalidate_node_list_cache(docp);
+
DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern);
}
/* }}} end dom_document_import_node */
@@ -1070,6 +1072,8 @@ PHP_METHOD(DOMDocument, normalizeDocument)
DOM_GET_OBJ(docp, id, xmlDocPtr, intern);
+ php_dom_invalidate_node_list_cache(docp);
+
dom_normalize((xmlNodePtr) docp);
}
/* }}} end dom_document_normalize_document */
@@ -1328,10 +1332,14 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
if (id != NULL) {
intern = Z_DOMOBJ_P(id);
+ size_t old_modification_nr = 0;
if (intern != NULL) {
docp = (xmlDocPtr) dom_object_get_node(intern);
doc_prop = NULL;
if (docp != NULL) {
+ const php_libxml_doc_ptr *doc_ptr = docp->_private;
+ ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
+ old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
intern->document->doc_props = NULL;
@@ -1348,6 +1356,12 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
+ /* Since iterators should invalidate, we need to start the modification number from the old counter */
+ if (old_modification_nr != 0) {
+ php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */
+ doc_ptr->cache_tag.modification_nr = old_modification_nr;
+ php_libxml_invalidate_node_list_cache(doc_ptr);
+ }
RETURN_TRUE;
} else {
@@ -1563,6 +1577,8 @@ PHP_METHOD(DOMDocument, xinclude)
php_dom_remove_xinclude_nodes(root);
}
+ php_dom_invalidate_node_list_cache(docp);
+
if (err) {
RETVAL_LONG(err);
} else {
@@ -1871,10 +1887,14 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
if (id != NULL && instanceof_function(Z_OBJCE_P(id), dom_document_class_entry)) {
intern = Z_DOMOBJ_P(id);
+ size_t old_modification_nr = 0;
if (intern != NULL) {
docp = (xmlDocPtr) dom_object_get_node(intern);
doc_prop = NULL;
if (docp != NULL) {
+ const php_libxml_doc_ptr *doc_ptr = docp->_private;
+ ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
+ old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
intern->document->doc_props = NULL;
@@ -1891,6 +1911,12 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
+ /* Since iterators should invalidate, we need to start the modification number from the old counter */
+ if (old_modification_nr != 0) {
+ php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */
+ doc_ptr->cache_tag.modification_nr = old_modification_nr;
+ php_libxml_invalidate_node_list_cache(doc_ptr);
+ }
RETURN_TRUE;
} else {
diff --git a/ext/dom/dom_iterators.c b/ext/dom/dom_iterators.c
index 72c97104db04d..48323332d7646 100644
--- a/ext/dom/dom_iterators.c
+++ b/ext/dom/dom_iterators.c
@@ -179,7 +179,7 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
dom_object *intern;
dom_object *nnmap;
dom_nnodemap_object *objmap;
- int previndex=0;
+ int previndex;
HashTable *nodeht;
zval *entry;
bool do_curobj_undef = 1;
@@ -205,20 +205,27 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
do_curobj_undef = 0;
}
} else {
- curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
if (objmap->nodetype == XML_ATTRIBUTE_NODE ||
objmap->nodetype == XML_ELEMENT_NODE) {
+ curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
curnode = curnode->next;
} else {
/* Nav the tree evey time as this is LIVE */
basenode = dom_object_get_node(objmap->baseobj);
- if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
- basenode->type == XML_HTML_DOCUMENT_NODE)) {
- basenode = xmlDocGetRootElement((xmlDoc *) basenode);
- } else if (basenode) {
- basenode = basenode->children;
+ if (php_dom_is_cache_tag_stale_from_node(&iterator->cache_tag, basenode)) {
+ if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
+ basenode->type == XML_HTML_DOCUMENT_NODE)) {
+ basenode = xmlDocGetRootElement((xmlDoc *) basenode);
+ } else if (basenode) {
+ basenode = basenode->children;
+ } else {
+ goto err;
+ }
+ php_dom_mark_cache_tag_up_to_date_from_node(&iterator->cache_tag, basenode);
+ previndex = 0;
} else {
- goto err;
+ previndex = iter->index - 1;
+ basenode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
}
curnode = dom_get_elements_by_tag_name_ns_raw(
basenode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
@@ -270,6 +277,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
}
iterator = emalloc(sizeof(php_dom_iterator));
zend_iterator_init(&iterator->intern);
+ iterator->cache_tag.modification_nr = 0;
ZVAL_OBJ_COPY(&iterator->intern.data, Z_OBJ_P(object));
iterator->intern.funcs = &php_dom_iterator_funcs;
diff --git a/ext/dom/node.c b/ext/dom/node.c
index fdb51bf51092f..7237a46b56ebf 100644
--- a/ext/dom/node.c
+++ b/ext/dom/node.c
@@ -769,6 +769,8 @@ int dom_node_text_content_write(dom_object *obj, zval *newval)
return FAILURE;
}
+ php_dom_invalidate_node_list_cache(nodep->doc);
+
const xmlChar *xmlChars = (const xmlChar *) ZSTR_VAL(str);
int type = nodep->type;
@@ -897,6 +899,8 @@ PHP_METHOD(DOMNode, insertBefore)
php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL);
}
+ php_dom_invalidate_node_list_cache(parentp->doc);
+
if (ref != NULL) {
DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj);
if (refp->parent != parentp) {
@@ -1086,6 +1090,7 @@ PHP_METHOD(DOMNode, replaceChild)
nodep->doc->intSubset = (xmlDtd *) newchild;
}
}
+ php_dom_invalidate_node_list_cache(nodep->doc);
DOM_RET_OBJ(oldchild, &ret, intern);
}
/* }}} end dom_node_replace_child */
@@ -1127,6 +1132,7 @@ PHP_METHOD(DOMNode, removeChild)
}
xmlUnlinkNode(child);
+ php_dom_invalidate_node_list_cache(nodep->doc);
DOM_RET_OBJ(child, &ret, intern);
}
/* }}} end dom_node_remove_child */
@@ -1230,6 +1236,8 @@ PHP_METHOD(DOMNode, appendChild)
dom_reconcile_ns(nodep->doc, new_child);
+ php_dom_invalidate_node_list_cache(nodep->doc);
+
DOM_RET_OBJ(new_child, &ret, intern);
}
/* }}} end dom_node_append_child */
@@ -1339,6 +1347,8 @@ PHP_METHOD(DOMNode, normalize)
DOM_GET_OBJ(nodep, id, xmlNodePtr, intern);
+ php_dom_invalidate_node_list_cache(nodep->doc);
+
dom_normalize(nodep);
}
@@ -1571,6 +1581,8 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
RETURN_THROWS();
}
+ php_dom_invalidate_node_list_cache(docp);
+
if (xpath_array == NULL) {
if (nodep->type != XML_DOCUMENT_NODE) {
ctxp = xmlXPathNewContext(docp);
diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c
index b03ebe1acd90a..81da629fed814 100644
--- a/ext/dom/nodelist.c
+++ b/ext/dom/nodelist.c
@@ -31,6 +31,23 @@
* Since:
*/
+static zend_always_inline void objmap_cache_release_cached_obj(dom_nnodemap_object *objmap)
+{
+ if (objmap->cached_obj) {
+ if (GC_DELREF(&objmap->cached_obj->std) == 0) {
+ zend_objects_store_del(&objmap->cached_obj->std);
+ }
+ objmap->cached_obj = NULL;
+ objmap->cached_obj_index = 0;
+ }
+}
+
+static zend_always_inline void reset_objmap_cache(dom_nnodemap_object *objmap)
+{
+ objmap_cache_release_cached_obj(objmap);
+ objmap->cached_length = -1;
+}
+
static int get_nodelist_length(dom_object *obj)
{
dom_nnodemap_object *objmap = (dom_nnodemap_object *) obj->ptr;
@@ -52,6 +69,17 @@ static int get_nodelist_length(dom_object *obj)
return 0;
}
+ if (!php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) {
+ if (objmap->cached_length >= 0) {
+ return objmap->cached_length;
+ }
+ /* Only the length is out-of-date, the cache tag is still valid.
+ * Therefore, only overwrite the length and keep the currently cached object. */
+ } else {
+ php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, nodep);
+ reset_objmap_cache(objmap);
+ }
+
int count = 0;
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
xmlNodePtr curnode = nodep->children;
@@ -72,6 +100,8 @@ static int get_nodelist_length(dom_object *obj)
nodep, (char *) objmap->ns, (char *) objmap->local, &count, -1);
}
+ objmap->cached_length = count;
+
return count;
}
@@ -113,11 +143,12 @@ PHP_METHOD(DOMNodeList, item)
zval *id;
zend_long index;
int ret;
+ bool cache_itemnode = false;
dom_object *intern;
xmlNodePtr itemnode = NULL;
dom_nnodemap_object *objmap;
- xmlNodePtr nodep, curnode;
+ xmlNodePtr nodep;
int count = 0;
id = ZEND_THIS;
@@ -147,21 +178,46 @@ PHP_METHOD(DOMNodeList, item)
} else if (objmap->baseobj) {
nodep = dom_object_get_node(objmap->baseobj);
if (nodep) {
+ /* For now we're only able to use cache for forward search.
+ * TODO: in the future we could extend the logic of the node list such that backwards searches
+ * are also possible. */
+ bool restart = true;
+ int relative_index = index;
+ if (index >= objmap->cached_obj_index && objmap->cached_obj && !php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) {
+ nodep = dom_object_get_node(objmap->cached_obj);
+ /* The node cannot be NULL if the cache is valid. If it is NULL, then it means we
+ * forgot an invalidation somewhere. Take the defensive programming approach and invalidate
+ * it here if it's NULL (except in debug mode where we would want to catch this). */
+ if (UNEXPECTED(nodep == NULL)) {
+#if ZEND_DEBUG
+ ZEND_UNREACHABLE();
+#endif
+ reset_objmap_cache(objmap);
+ } else {
+ restart = false;
+ relative_index -= objmap->cached_obj_index;
+ }
+ }
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
- curnode = nodep->children;
- while (count < index && curnode != NULL) {
+ if (restart) {
+ nodep = nodep->children;
+ }
+ while (count < relative_index && nodep != NULL) {
count++;
- curnode = curnode->next;
+ nodep = nodep->next;
}
- itemnode = curnode;
+ itemnode = nodep;
} else {
- if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
- nodep = xmlDocGetRootElement((xmlDoc *) nodep);
- } else {
- nodep = nodep->children;
+ if (restart) {
+ if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
+ nodep = xmlDocGetRootElement((xmlDoc*) nodep);
+ } else {
+ nodep = nodep->children;
+ }
}
- itemnode = dom_get_elements_by_tag_name_ns_raw(nodep, (char *) objmap->ns, (char *) objmap->local, &count, index);
+ itemnode = dom_get_elements_by_tag_name_ns_raw(nodep, (char *) objmap->ns, (char *) objmap->local, &count, relative_index);
}
+ cache_itemnode = true;
}
}
}
@@ -169,6 +225,21 @@ PHP_METHOD(DOMNodeList, item)
if (itemnode) {
DOM_RET_OBJ(itemnode, &ret, objmap->baseobj);
+ if (cache_itemnode) {
+ /* Hold additional reference for the cache, must happen before releasing the cache
+ * because we might be the last reference holder. */
+ dom_object *cached_obj = Z_DOMOBJ_P(return_value);
+ GC_ADDREF(&cached_obj->std);
+ /* If the tag is stale, all cached data is useless. Otherwise only the cached object is useless. */
+ if (php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) {
+ php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, nodep);
+ reset_objmap_cache(objmap);
+ } else {
+ objmap_cache_release_cached_obj(objmap);
+ }
+ objmap->cached_obj_index = index;
+ objmap->cached_obj = cached_obj;
+ }
return;
}
}
diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c
index 4d0fffeb9e058..d7b9f3c2792f2 100644
--- a/ext/dom/parentnode.c
+++ b/ext/dom/parentnode.c
@@ -280,6 +280,8 @@ void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc)
return;
}
+ php_dom_invalidate_node_list_cache(parentNode->doc);
+
xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
if (fragment == NULL) {
@@ -322,6 +324,8 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc)
return;
}
+ php_dom_invalidate_node_list_cache(parentNode->doc);
+
xmlNodePtr newchild, nextsib;
xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@@ -402,6 +406,8 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc)
doc = prevsib->doc;
+ php_dom_invalidate_node_list_cache(doc);
+
/* Spec step 4: convert nodes into fragment */
fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@@ -451,6 +457,8 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc)
doc = nextsib->doc;
+ php_dom_invalidate_node_list_cache(doc);
+
/* Spec step 4: convert nodes into fragment */
fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@@ -506,6 +514,8 @@ void dom_child_node_remove(dom_object *context)
return;
}
+ php_dom_invalidate_node_list_cache(context->document->ptr);
+
while (children) {
if (children == child) {
xmlUnlinkNode(child);
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index e02b0973291c5..926c3a60c09ff 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1010,6 +1010,9 @@ void dom_nnodemap_objects_free_storage(zend_object *object) /* {{{ */
dom_nnodemap_object *objmap = (dom_nnodemap_object *)intern->ptr;
if (objmap) {
+ if (objmap->cached_obj && GC_DELREF(&objmap->cached_obj->std) == 0) {
+ zend_objects_store_del(&objmap->cached_obj->std);
+ }
if (objmap->local) {
xmlFree(objmap->local);
}
@@ -1043,6 +1046,10 @@ zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type) /* {{{ */
objmap->ht = NULL;
objmap->local = NULL;
objmap->ns = NULL;
+ objmap->cache_tag.modification_nr = 0;
+ objmap->cached_length = -1;
+ objmap->cached_obj = NULL;
+ objmap->cached_obj_index = 0;
return &intern->std;
}
@@ -1223,6 +1230,7 @@ bool dom_has_feature(zend_string *feature, zend_string *version)
xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */
{
xmlNodePtr ret = NULL;
+ bool local_match_any = local[0] == '*' && local[1] == '\0';
/* Note: The spec says that ns == '' must be transformed to ns == NULL. In other words, they are equivalent.
* PHP however does not do this and internally uses the empty string everywhere when the user provides ns == NULL.
@@ -1231,7 +1239,7 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l
while (nodep != NULL && (*cur <= index || index == -1)) {
if (nodep->type == XML_ELEMENT_NODE) {
- if (xmlStrEqual(nodep->name, (xmlChar *)local) || xmlStrEqual((xmlChar *)"*", (xmlChar *)local)) {
+ if (local_match_any || xmlStrEqual(nodep->name, (xmlChar *)local)) {
if (ns_match_any || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) {
if (*cur == index) {
ret = nodep;
@@ -1249,7 +1257,6 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l
}
return ret;
}
-/* }}} */
/* }}} end dom_element_get_elements_by_tag_name_ns_raw */
static inline bool is_empty_node(xmlNodePtr nodep)
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index a7ae09384cfdc..e14f26e101017 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -82,15 +82,20 @@ typedef struct _dom_nnodemap_object {
dom_object *baseobj;
zval baseobj_zv;
int nodetype;
+ int cached_length;
xmlHashTable *ht;
xmlChar *local;
xmlChar *ns;
+ php_libxml_cache_tag cache_tag;
+ dom_object *cached_obj;
+ int cached_obj_index;
} dom_nnodemap_object;
typedef struct {
zend_object_iterator intern;
zval curobj;
HashPosition pos;
+ php_libxml_cache_tag cache_tag;
} php_dom_iterator;
#include "domexception.h"
@@ -153,6 +158,40 @@ void dom_child_node_remove(dom_object *context);
#define DOM_NODELIST 0
#define DOM_NAMEDNODEMAP 1
+static zend_always_inline void php_dom_invalidate_node_list_cache(xmlDocPtr docp)
+{
+ if (docp && docp->_private) { /* not all elements have an associated document (e.g. invalid hierarchy) */
+ php_libxml_invalidate_node_list_cache(docp->_private);
+ }
+}
+
+static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_doc_ptr *doc_ptr)
+{
+ ZEND_ASSERT(cache_tag != NULL);
+ ZEND_ASSERT(doc_ptr != NULL);
+ /* See overflow comment in php_libxml_invalidate_node_list_cache(). */
+#if SIZEOF_SIZE_T == 8
+ return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr;
+#else
+ return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr || UNEXPECTED(doc_ptr->cache_tag.modification_nr == SIZE_MAX);
+#endif
+}
+
+static zend_always_inline bool php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag *cache_tag, const xmlNodePtr node)
+{
+ ZEND_ASSERT(node != NULL);
+ return !node->doc || !node->doc->_private || php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, node->doc->_private);
+}
+
+static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag *cache_tag, const xmlNodePtr node)
+{
+ ZEND_ASSERT(cache_tag != NULL);
+ if (node->doc && node->doc->_private) {
+ const php_libxml_doc_ptr* doc_ptr = node->doc->_private;
+ cache_tag->modification_nr = doc_ptr->cache_tag.modification_nr;
+ }
+}
+
PHP_MINIT_FUNCTION(dom);
PHP_MSHUTDOWN_FUNCTION(dom);
PHP_MINFO_FUNCTION(dom);
diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt
new file mode 100644
index 0000000000000..2bd02bc87034b
--- /dev/null
+++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt
@@ -0,0 +1,21 @@
+--TEST--
+DOMDocument::getElementsByTagName() is live
+--EXTENSIONS--
+dom
+--FILE--
+loadXML( '' );
+$root = $doc->documentElement;
+
+$i = 0;
+
+foreach ($doc->getElementsByTagName('e') as $node) {
+ print $node->getAttribute('i') . ' ';
+ if ($i++ % 2 == 0)
+ $root->removeChild($node);
+}
+print "\n";
+?>
+--EXPECT--
+1 3 4 6 7 9 10 12 13 15
diff --git a/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt b/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt
new file mode 100644
index 0000000000000..dad532b8167fe
--- /dev/null
+++ b/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt
@@ -0,0 +1,69 @@
+--TEST--
+DOMDocument node list item cache invalidation
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML('
hello
world
');
+
+$elements = $doc->getElementsByTagName('p');
+$elements->item(0); // Activate item cache
+$doc->loadHTML('A
B
C
');
+var_dump($elements);
+var_dump($elements->item(0)->textContent); // First lookup
+var_dump($elements->item(2)->textContent); // Uses cache
+var_dump($elements->item(1)->textContent); // Does not use cache
+
+echo "-- Remove cached item test --\n";
+
+$doc = new DOMDocument();
+$doc->loadHTML('hello
world
!
');
+
+$elements = $doc->getElementsByTagName('p');
+$item = $elements->item(0); // Activate item cache
+var_dump($item->textContent);
+$item->remove();
+// Now element 0 means "world", and 1 means "!"
+unset($item);
+$item = $elements->item(1);
+var_dump($item->textContent);
+
+echo "-- Removal of cached item in loop test --\n";
+
+$doc = new DOMDocument;
+$doc->loadXML( '' );
+$root = $doc->documentElement;
+
+$i = 0;
+$elements = $root->getElementsByTagName('e');
+for ($i = 0; $i < 11; $i++) {
+ $node = $elements->item($i);
+ print $node->getAttribute('i') . ' ';
+ if ($i++ % 2 == 0)
+ $root->removeChild( $node );
+}
+print "\n";
+
+?>
+--EXPECTF--
+-- Switch document test --
+object(DOMNodeList)#2 (1) {
+ ["length"]=>
+ int(3)
+}
+string(1) "A"
+string(1) "C"
+string(1) "B"
+-- Remove cached item test --
+string(5) "hello"
+string(1) "!"
+-- Removal of cached item in loop test --
+1 4 7 10 13
+Fatal error: Uncaught Error: Call to a member function getAttribute() on null in %s:%d
+Stack trace:
+#0 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt b/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt
new file mode 100644
index 0000000000000..7a3633894a381
--- /dev/null
+++ b/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt
@@ -0,0 +1,34 @@
+--TEST--
+DOMDocument node list length cache invalidation
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML('hello
world
!
');
+
+$elements = $doc->getElementsByTagName('p');
+$item = $elements->item(0); // Activate item cache
+var_dump($elements->length); // Length not cached yet, should still compute
+$item->remove();
+// Now element 0 means "world", and 1 means "!"
+unset($item);
+var_dump($elements->length);
+$item = $elements->item(1);
+var_dump($item->textContent);
+$item = $elements->item(1);
+var_dump($item->textContent);
+$item = $elements->item(0);
+var_dump($item->textContent);
+$item = $elements->item(1);
+var_dump($item->textContent);
+
+?>
+--EXPECT--
+int(3)
+int(2)
+string(1) "!"
+string(1) "!"
+string(5) "world"
+string(1) "!"
diff --git a/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt
new file mode 100644
index 0000000000000..eaa600a666c55
--- /dev/null
+++ b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt
@@ -0,0 +1,26 @@
+--TEST--
+DOMDocument liveness caching invalidation by textContent
+--EXTENSIONS--
+dom
+--FILE--
+createElement( 'root' );
+$doc->appendChild( $root );
+for ( $i = 0; $i < 5; $i++ ) {
+ $root->appendChild( $doc->createElement( 'e' ) );
+}
+
+$i = 0;
+
+foreach ( $doc->getElementsByTagName( 'e' ) as $node ) {
+ if ($i++ == 2) {
+ $root->textContent = 'overwrite';
+ }
+ var_dump($node->tagName);
+}
+?>
+--EXPECTF--
+string(1) "e"
+string(1) "e"
+string(1) "e"
diff --git a/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt b/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt
new file mode 100644
index 0000000000000..cf6acf46162a3
--- /dev/null
+++ b/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt
@@ -0,0 +1,14 @@
+--TEST--
+Node list cache should not break on DOMElement::getElementsByTagName() without document
+--EXTENSIONS--
+dom
+--FILE--
+getElementsByTagName('b') as $x);
+
+?>
+Done
+--EXPECT--
+Done
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index 71ed3f911cca7..5af3443069aba 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -1163,8 +1163,14 @@ PHP_LIBXML_API int php_libxml_increment_node_ptr(php_libxml_node_object *object,
object->node->_private = private_data;
}
} else {
+ if (UNEXPECTED(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)) {
+ php_libxml_doc_ptr *doc_ptr = emalloc(sizeof(php_libxml_doc_ptr));
+ doc_ptr->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */
+ object->node = (php_libxml_node_ptr *) doc_ptr; /* downcast */
+ } else {
+ object->node = emalloc(sizeof(php_libxml_node_ptr));
+ }
ret_refcount = 1;
- object->node = emalloc(sizeof(php_libxml_node_ptr));
object->node->node = node;
object->node->refcount = 1;
object->node->_private = private_data;
diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h
index de9b49d2ce3b6..785fd64ed2403 100644
--- a/ext/libxml/php_libxml.h
+++ b/ext/libxml/php_libxml.h
@@ -69,6 +69,16 @@ typedef struct _php_libxml_node_ptr {
void *_private;
} php_libxml_node_ptr;
+typedef struct {
+ size_t modification_nr;
+} php_libxml_cache_tag;
+
+/* extends php_libxml_node_ptr */
+typedef struct {
+ php_libxml_node_ptr node_ptr;
+ php_libxml_cache_tag cache_tag;
+} php_libxml_doc_ptr;
+
typedef struct _php_libxml_node_object {
php_libxml_node_ptr *node;
php_libxml_ref_obj *document;
@@ -81,6 +91,20 @@ static inline php_libxml_node_object *php_libxml_node_fetch_object(zend_object *
return (php_libxml_node_object *)((char*)(obj) - obj->handlers->offset);
}
+static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_doc_ptr *doc_ptr)
+{
+#if SIZEOF_SIZE_T == 8
+ /* If one operation happens every nanosecond, then it would still require 584 years to overflow
+ * the counter. So we'll just assume this never happens. */
+ doc_ptr->cache_tag.modification_nr++;
+#else
+ size_t new_modification_nr = doc_ptr->cache_tag.modification_nr + 1;
+ if (EXPECTED(new_modification_nr > 0)) { /* unsigned overflow; checking after addition results in one less instruction */
+ doc_ptr->cache_tag.modification_nr = new_modification_nr;
+ }
+#endif
+}
+
#define Z_LIBXML_NODE_P(zv) php_libxml_node_fetch_object(Z_OBJ_P((zv)))
typedef void * (*php_libxml_export_node) (zval *object);
From 02c07618958968bafeb62373d272c4587f6ab071 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Mon, 29 May 2023 17:11:18 +0200
Subject: [PATCH 02/18] Use match any for ns too (note: contains bugfix for
which I'll create PR)
---
ext/dom/php_dom.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 926c3a60c09ff..1fbf6bb3c0d9f 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1240,7 +1240,7 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l
while (nodep != NULL && (*cur <= index || index == -1)) {
if (nodep->type == XML_ELEMENT_NODE) {
if (local_match_any || xmlStrEqual(nodep->name, (xmlChar *)local)) {
- if (ns_match_any || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) {
+ if (ns_match_any || (ns[0] == '\0' && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) {
if (*cur == index) {
ret = nodep;
break;
From ff807d6997511f1e255ee4bb20721845a26ba1e9 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Mon, 29 May 2023 17:12:19 +0200
Subject: [PATCH 03/18] Update comment
---
ext/dom/dom_iterators.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/ext/dom/dom_iterators.c b/ext/dom/dom_iterators.c
index 48323332d7646..3d1fe629c8cf8 100644
--- a/ext/dom/dom_iterators.c
+++ b/ext/dom/dom_iterators.c
@@ -210,7 +210,8 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
curnode = curnode->next;
} else {
- /* Nav the tree evey time as this is LIVE */
+ /* The collection is live, we nav the tree from the base object if we cannot
+ * use the cache to restart from the last point. */
basenode = dom_object_get_node(objmap->baseobj);
if (php_dom_is_cache_tag_stale_from_node(&iterator->cache_tag, basenode)) {
if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
From a75c85a59b9f2aa2abfa41544feb8f650a30781c Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Mon, 29 May 2023 23:39:12 +0200
Subject: [PATCH 04/18] Comments
---
ext/dom/nodelist.c | 7 ++++-
...ocument_getElementsByTagName_liveness.phpt | 26 +++++++++++++++++++
2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c
index 81da629fed814..021637c5e5cc5 100644
--- a/ext/dom/nodelist.c
+++ b/ext/dom/nodelist.c
@@ -34,6 +34,7 @@
static zend_always_inline void objmap_cache_release_cached_obj(dom_nnodemap_object *objmap)
{
if (objmap->cached_obj) {
+ /* Since the DOM is a tree there can be no cycles. */
if (GC_DELREF(&objmap->cached_obj->std) == 0) {
zend_objects_store_del(&objmap->cached_obj->std);
}
@@ -227,7 +228,11 @@ PHP_METHOD(DOMNodeList, item)
DOM_RET_OBJ(itemnode, &ret, objmap->baseobj);
if (cache_itemnode) {
/* Hold additional reference for the cache, must happen before releasing the cache
- * because we might be the last reference holder. */
+ * because we might be the last reference holder.
+ * Instead of storing and copying zvals, we store the object pointer directly.
+ * This saves us some bytes because a pointer is smaller than a zval.
+ * This also means we have to manually refcount the objects here, and remove the reference count
+ * in reset_objmap_cache() and the destructor. */
dom_object *cached_obj = Z_DOMOBJ_P(return_value);
GC_ADDREF(&cached_obj->std);
/* If the tag is stale, all cached data is useless. Otherwise only the cached object is useless. */
diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt
index 2bd02bc87034b..2b4622d10d389 100644
--- a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt
+++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt
@@ -10,6 +10,32 @@ $root = $doc->documentElement;
$i = 0;
+/* Note that the list is live. The explanation for the output is as follows:
+ Before the loop we have the following (writing only the attributes):
+ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+
+ Now the loop starts, the current element is marked with a V. $i == 0:
+ V
+ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ 1 gets printed. $i == 0, which is even, so 1 gets removed, which results in:
+ V
+ 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ Note that everything shifted to the left.
+ Because the list is live, the current element pointer still refers to the first index, which now corresponds to element with attribute 2.
+ Now the foreach body ends, which means we go to the next element, which is now 3 instead of 2.
+ V
+ 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ 3 gets printed. $i == 1, which is odd, so nothing happens and we move on to the next element:
+ V
+ 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ 4 gets printed. $i == 2, which is even, so 4 gets removed, which results in:
+ V
+ 2 3 5 6 7 8 9 10 11 12 13 14 15
+ Note again everything shifted to the left.
+ Now the foreach body ends, which means we go to the next element, which is now 6 instead of 5.
+ V
+ 2 3 5 6 7 8 9 10 11 12 13 14 15
+ 6 gets printed, etc... */
foreach ($doc->getElementsByTagName('e') as $node) {
print $node->getAttribute('i') . ' ';
if ($i++ % 2 == 0)
From dccd29c03e71f6f1952ab036a6a931df5a0d1df3 Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 31 May 2023 19:44:39 +0200
Subject: [PATCH 05/18] Fix staleness check
---
ext/dom/nodelist.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c
index 021637c5e5cc5..11cab96e41a1a 100644
--- a/ext/dom/nodelist.c
+++ b/ext/dom/nodelist.c
@@ -185,11 +185,12 @@ PHP_METHOD(DOMNodeList, item)
bool restart = true;
int relative_index = index;
if (index >= objmap->cached_obj_index && objmap->cached_obj && !php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) {
- nodep = dom_object_get_node(objmap->cached_obj);
+ xmlNodePtr cached_obj_xml_node = dom_object_get_node(objmap->cached_obj);
+
/* The node cannot be NULL if the cache is valid. If it is NULL, then it means we
* forgot an invalidation somewhere. Take the defensive programming approach and invalidate
* it here if it's NULL (except in debug mode where we would want to catch this). */
- if (UNEXPECTED(nodep == NULL)) {
+ if (UNEXPECTED(cached_obj_xml_node == NULL)) {
#if ZEND_DEBUG
ZEND_UNREACHABLE();
#endif
@@ -197,6 +198,7 @@ PHP_METHOD(DOMNodeList, item)
} else {
restart = false;
relative_index -= objmap->cached_obj_index;
+ nodep = cached_obj_xml_node;
}
}
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
From 737f8be45a0162763fddfa59d416c6ec82cce5e6 Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 31 May 2023 20:01:38 +0200
Subject: [PATCH 06/18] Fix SimpleXML invalidations
Test was Co-authored-by: tstarling
---
...tElementsByTagName_liveness_simplexml.phpt | 28 +++++++++++++++++++
ext/simplexml/simplexml.c | 16 +++++++++++
2 files changed, 44 insertions(+)
create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt
diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt
new file mode 100644
index 0000000000000..d1eda9a2271f3
--- /dev/null
+++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt
@@ -0,0 +1,28 @@
+--TEST--
+DOMDocument::getElementsByTagName() liveness with simplexml_import_dom
+--EXTENSIONS--
+dom
+--FILE--
+loadXML( '' );
+$list = $doc->getElementsByTagName('e');
+print $list->item(5)->getAttribute('i')."\n";
+echo "before import\n";
+$s = simplexml_import_dom($doc->documentElement);
+echo "after import\n";
+
+unset($s->e[5]);
+print $list->item(5)->getAttribute('i')."\n";
+
+unset($s->e[5]);
+print $list->item(5)->getAttribute('i')."\n";
+
+?>
+--EXPECT--
+6
+before import
+after import
+7
+8
diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c
index d3f2865e12036..50ff0f75b580a 100644
--- a/ext/simplexml/simplexml.c
+++ b/ext/simplexml/simplexml.c
@@ -43,6 +43,16 @@ PHP_SXE_API zend_class_entry *sxe_get_element_class_entry(void) /* {{{ */
}
/* }}} */
+/* Similar to php_dom_invalidate_node_list_cache(), but SimpleXML shouldn't depend on ext/dom. */
+static zend_always_inline void php_sxe_invalidate_node_list_cache(xmlNodePtr node)
+{
+ ZEND_ASSERT(node != NULL);
+ xmlDocPtr docp = node->doc;
+ if (docp && docp->_private) {
+ php_libxml_invalidate_node_list_cache(docp->_private);
+ }
+}
+
static php_sxe_object* php_sxe_object_new(zend_class_entry *ce, zend_function *fptr_count);
static xmlNodePtr php_sxe_reset_iterator(php_sxe_object *sxe, int use_data);
static xmlNodePtr php_sxe_iterator_fetch(php_sxe_object *sxe, xmlNodePtr node, int use_data);
@@ -442,6 +452,8 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value,
GET_NODE(sxe, node);
+ php_sxe_invalidate_node_list_cache(node);
+
if (sxe->iter.type == SXE_ITER_ATTRLIST) {
attribs = 1;
elements = 0;
@@ -813,6 +825,8 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements
GET_NODE(sxe, node);
+ php_sxe_invalidate_node_list_cache(node);
+
if (Z_TYPE_P(member) == IS_LONG) {
if (sxe->iter.type != SXE_ITER_ATTRLIST) {
attribs = 0;
@@ -1686,6 +1700,8 @@ PHP_METHOD(SimpleXMLElement, addChild)
sxe = Z_SXEOBJ_P(ZEND_THIS);
GET_NODE(sxe, node);
+ php_sxe_invalidate_node_list_cache(node);
+
if (sxe->iter.type == SXE_ITER_ATTRLIST) {
php_error_docref(NULL, E_WARNING, "Cannot add element to attributes");
return;
From 3e1eadf628c5738d6ad01fafac9e9d7b2ab24856 Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 31 May 2023 20:08:34 +0200
Subject: [PATCH 07/18] Move common code, update internals upgrading note with
the new functions
---
UPGRADING.INTERNALS | 10 +++++++++-
ext/dom/document.c | 6 +++---
ext/dom/node.c | 14 +++++++-------
ext/dom/parentnode.c | 10 +++++-----
ext/dom/php_dom.h | 7 -------
ext/libxml/php_libxml.h | 7 +++++++
ext/simplexml/simplexml.c | 16 +++-------------
7 files changed, 34 insertions(+), 36 deletions(-)
diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
index 4368eac6ad5a5..bf489c3258ed7 100644
--- a/UPGRADING.INTERNALS
+++ b/UPGRADING.INTERNALS
@@ -125,7 +125,15 @@ PHP 8.3 INTERNALS UPGRADE NOTES
This means that the length and the last requested item are not recalculated
when the node list is iterated over multiple times.
If you do not use the internal PHP dom APIs to modify the document, you need to
- manually invalidate the cache using php_dom_invalidate_node_list_cache().
+ manually invalidate the cache using php_libxml_invalidate_node_list_cache_from_doc().
+ Furthermore, the following internal APIs were added to handle the cache:
+ . php_dom_is_cache_tag_stale_from_doc_ptr()
+ . php_dom_is_cache_tag_stale_from_node()
+ . php_dom_mark_cache_tag_up_to_date_from_node()
+
+ g. ext/libxml
+ - Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and
+ php_libxml_invalidate_node_list_cache() were added to invalidate the cache of a node list.
========================
4. OpCode changes
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 47a9922ba0f4e..956bffa700927 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -847,7 +847,7 @@ PHP_METHOD(DOMDocument, importNode)
}
}
- php_dom_invalidate_node_list_cache(docp);
+ php_libxml_invalidate_node_list_cache_from_doc(docp);
DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern);
}
@@ -1072,7 +1072,7 @@ PHP_METHOD(DOMDocument, normalizeDocument)
DOM_GET_OBJ(docp, id, xmlDocPtr, intern);
- php_dom_invalidate_node_list_cache(docp);
+ php_libxml_invalidate_node_list_cache_from_doc(docp);
dom_normalize((xmlNodePtr) docp);
}
@@ -1577,7 +1577,7 @@ PHP_METHOD(DOMDocument, xinclude)
php_dom_remove_xinclude_nodes(root);
}
- php_dom_invalidate_node_list_cache(docp);
+ php_libxml_invalidate_node_list_cache_from_doc(docp);
if (err) {
RETVAL_LONG(err);
diff --git a/ext/dom/node.c b/ext/dom/node.c
index 7237a46b56ebf..8cb95fd463d7e 100644
--- a/ext/dom/node.c
+++ b/ext/dom/node.c
@@ -769,7 +769,7 @@ int dom_node_text_content_write(dom_object *obj, zval *newval)
return FAILURE;
}
- php_dom_invalidate_node_list_cache(nodep->doc);
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
const xmlChar *xmlChars = (const xmlChar *) ZSTR_VAL(str);
int type = nodep->type;
@@ -899,7 +899,7 @@ PHP_METHOD(DOMNode, insertBefore)
php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL);
}
- php_dom_invalidate_node_list_cache(parentp->doc);
+ php_libxml_invalidate_node_list_cache_from_doc(parentp->doc);
if (ref != NULL) {
DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj);
@@ -1090,7 +1090,7 @@ PHP_METHOD(DOMNode, replaceChild)
nodep->doc->intSubset = (xmlDtd *) newchild;
}
}
- php_dom_invalidate_node_list_cache(nodep->doc);
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(oldchild, &ret, intern);
}
/* }}} end dom_node_replace_child */
@@ -1132,7 +1132,7 @@ PHP_METHOD(DOMNode, removeChild)
}
xmlUnlinkNode(child);
- php_dom_invalidate_node_list_cache(nodep->doc);
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(child, &ret, intern);
}
/* }}} end dom_node_remove_child */
@@ -1236,7 +1236,7 @@ PHP_METHOD(DOMNode, appendChild)
dom_reconcile_ns(nodep->doc, new_child);
- php_dom_invalidate_node_list_cache(nodep->doc);
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(new_child, &ret, intern);
}
@@ -1347,7 +1347,7 @@ PHP_METHOD(DOMNode, normalize)
DOM_GET_OBJ(nodep, id, xmlNodePtr, intern);
- php_dom_invalidate_node_list_cache(nodep->doc);
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
dom_normalize(nodep);
@@ -1581,7 +1581,7 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
RETURN_THROWS();
}
- php_dom_invalidate_node_list_cache(docp);
+ php_libxml_invalidate_node_list_cache_from_doc(docp);
if (xpath_array == NULL) {
if (nodep->type != XML_DOCUMENT_NODE) {
diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c
index d7b9f3c2792f2..36cd6104f38a4 100644
--- a/ext/dom/parentnode.c
+++ b/ext/dom/parentnode.c
@@ -280,7 +280,7 @@ void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc)
return;
}
- php_dom_invalidate_node_list_cache(parentNode->doc);
+ php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc);
xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@@ -324,7 +324,7 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc)
return;
}
- php_dom_invalidate_node_list_cache(parentNode->doc);
+ php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc);
xmlNodePtr newchild, nextsib;
xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@@ -406,7 +406,7 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc)
doc = prevsib->doc;
- php_dom_invalidate_node_list_cache(doc);
+ php_libxml_invalidate_node_list_cache_from_doc(doc);
/* Spec step 4: convert nodes into fragment */
fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@@ -457,7 +457,7 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc)
doc = nextsib->doc;
- php_dom_invalidate_node_list_cache(doc);
+ php_libxml_invalidate_node_list_cache_from_doc(doc);
/* Spec step 4: convert nodes into fragment */
fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@@ -514,7 +514,7 @@ void dom_child_node_remove(dom_object *context)
return;
}
- php_dom_invalidate_node_list_cache(context->document->ptr);
+ php_libxml_invalidate_node_list_cache_from_doc(context->document->ptr);
while (children) {
if (children == child) {
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index e14f26e101017..fd9cfde059c67 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -158,13 +158,6 @@ void dom_child_node_remove(dom_object *context);
#define DOM_NODELIST 0
#define DOM_NAMEDNODEMAP 1
-static zend_always_inline void php_dom_invalidate_node_list_cache(xmlDocPtr docp)
-{
- if (docp && docp->_private) { /* not all elements have an associated document (e.g. invalid hierarchy) */
- php_libxml_invalidate_node_list_cache(docp->_private);
- }
-}
-
static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_doc_ptr *doc_ptr)
{
ZEND_ASSERT(cache_tag != NULL);
diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h
index 785fd64ed2403..a23ff6ee57c13 100644
--- a/ext/libxml/php_libxml.h
+++ b/ext/libxml/php_libxml.h
@@ -105,6 +105,13 @@ static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_
#endif
}
+static zend_always_inline void php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp)
+{
+ if (docp && docp->_private) { /* docp is NULL for detached nodes */
+ php_libxml_invalidate_node_list_cache(docp->_private);
+ }
+}
+
#define Z_LIBXML_NODE_P(zv) php_libxml_node_fetch_object(Z_OBJ_P((zv)))
typedef void * (*php_libxml_export_node) (zval *object);
diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c
index 50ff0f75b580a..e0340b2e3a68a 100644
--- a/ext/simplexml/simplexml.c
+++ b/ext/simplexml/simplexml.c
@@ -43,16 +43,6 @@ PHP_SXE_API zend_class_entry *sxe_get_element_class_entry(void) /* {{{ */
}
/* }}} */
-/* Similar to php_dom_invalidate_node_list_cache(), but SimpleXML shouldn't depend on ext/dom. */
-static zend_always_inline void php_sxe_invalidate_node_list_cache(xmlNodePtr node)
-{
- ZEND_ASSERT(node != NULL);
- xmlDocPtr docp = node->doc;
- if (docp && docp->_private) {
- php_libxml_invalidate_node_list_cache(docp->_private);
- }
-}
-
static php_sxe_object* php_sxe_object_new(zend_class_entry *ce, zend_function *fptr_count);
static xmlNodePtr php_sxe_reset_iterator(php_sxe_object *sxe, int use_data);
static xmlNodePtr php_sxe_iterator_fetch(php_sxe_object *sxe, xmlNodePtr node, int use_data);
@@ -452,7 +442,7 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value,
GET_NODE(sxe, node);
- php_sxe_invalidate_node_list_cache(node);
+ php_libxml_invalidate_node_list_cache_from_doc(node->doc);
if (sxe->iter.type == SXE_ITER_ATTRLIST) {
attribs = 1;
@@ -825,7 +815,7 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements
GET_NODE(sxe, node);
- php_sxe_invalidate_node_list_cache(node);
+ php_libxml_invalidate_node_list_cache_from_doc(node->doc);
if (Z_TYPE_P(member) == IS_LONG) {
if (sxe->iter.type != SXE_ITER_ATTRLIST) {
@@ -1700,7 +1690,7 @@ PHP_METHOD(SimpleXMLElement, addChild)
sxe = Z_SXEOBJ_P(ZEND_THIS);
GET_NODE(sxe, node);
- php_sxe_invalidate_node_list_cache(node);
+ php_libxml_invalidate_node_list_cache_from_doc(node->doc);
if (sxe->iter.type == SXE_ITER_ATTRLIST) {
php_error_docref(NULL, E_WARNING, "Cannot add element to attributes");
From 6ca41b72bd8860a5bdfac275868dd437449cb74b Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 31 May 2023 20:13:53 +0200
Subject: [PATCH 08/18] Add DOMDocument::xinclude() test for liveness
---
...etElementsByTagName_liveness_xinclude.phpt | 43 +++++++++++++++++++
1 file changed, 43 insertions(+)
create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt
diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt
new file mode 100644
index 0000000000000..2c14a2080569e
--- /dev/null
+++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt
@@ -0,0 +1,43 @@
+--TEST--
+DOMDocument::getElementsByTagName() liveness with DOMDocument::xinclude()
+--EXTENSIONS--
+dom
+--FILE--
+
+
+ Hello
+
+
+
+ xinclude: book.xml not found
+
+
+
+
+EOD;
+
+$dom = new DOMDocument;
+$dom->loadXML($xml);
+$elements = $dom->getElementsByTagName('p');
+var_dump($elements->item(0)->textContent);
+@$dom->xinclude();
+var_dump($elements->item(1)->textContent);
+echo $dom->saveXML();
+
+?>
+--EXPECT--
+string(5) "Hello"
+string(28) "xinclude: book.xml not found"
+
+
+ Hello
+
+
+ xinclude: book.xml not found
+
+
+
From 57215ac95d40e52ce502235d85549c5e63dc1fbf Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 31 May 2023 21:23:07 +0200
Subject: [PATCH 09/18] More invalidations
---
ext/dom/node.c | 2 +
ext/dom/processinginstruction.c | 2 +
...tsByTagName_liveness_write_properties.phpt | 43 +++++++++++++++++++
3 files changed, 47 insertions(+)
create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt
diff --git a/ext/dom/node.c b/ext/dom/node.c
index 8cb95fd463d7e..ffd7a87f9015e 100644
--- a/ext/dom/node.c
+++ b/ext/dom/node.c
@@ -195,6 +195,8 @@ int dom_node_node_value_write(dom_object *obj, zval *newval)
break;
}
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
+
zend_string_release_ex(str, 0);
return SUCCESS;
}
diff --git a/ext/dom/processinginstruction.c b/ext/dom/processinginstruction.c
index 465ecb431e73a..c40d24d18ce23 100644
--- a/ext/dom/processinginstruction.c
+++ b/ext/dom/processinginstruction.c
@@ -128,6 +128,8 @@ int dom_processinginstruction_data_write(dom_object *obj, zval *newval)
return FAILURE;
}
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
+
xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1);
zend_string_release_ex(str, 0);
diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt
new file mode 100644
index 0000000000000..af8af51844c9d
--- /dev/null
+++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt
@@ -0,0 +1,43 @@
+--TEST--
+DOMDocument::getElementsByTagName() liveness affected by writing properties
+--EXTENSIONS--
+dom
+--FILE--
+';
+$fields = ['nodeValue', 'textContent'];
+
+foreach ($fields as $field) {
+ $doc = new DOMDocument;
+ $doc->loadXML($xml);
+ $list = $doc->getElementsByTagName('a');
+ var_dump($list->item(0) === NULL);
+ $doc->documentElement->{$field} = 'new_content';
+ var_dump($list->item(0) === NULL);
+ print $doc->saveXML();
+}
+
+// Shouldn't be affected
+$doc = new DOMDocument;
+$doc->loadXML($xml);
+$list = $doc->getElementsByTagNameNS('foo', 'a');
+var_dump($list->item(0) === NULL);
+$doc->documentElement->firstChild->prefix = 'ns2';
+var_dump($list->item(0) === NULL);
+print $doc->saveXML();
+
+?>
+--EXPECT--
+bool(false)
+bool(true)
+
+new_content
+bool(false)
+bool(true)
+
+new_content
+bool(false)
+bool(false)
+
+
From 8e614b36e5846e63a2aba22c7798bba31c6fdca5 Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 31 May 2023 22:46:26 +0200
Subject: [PATCH 10/18] Fix tree traversal and add some tests
---
UPGRADING.INTERNALS | 2 +
ext/dom/dom_iterators.c | 32 +++----
ext/dom/nodelist.c | 22 ++---
ext/dom/php_dom.c | 24 +++--
ext/dom/php_dom.h | 2 +-
...tElementsByTagName_liveness_tree_walk.phpt | 89 +++++++++++++++++++
...ocument_liveness_caching_invalidation.phpt | 33 +++++--
7 files changed, 165 insertions(+), 39 deletions(-)
create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt
diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
index bf489c3258ed7..c16e0d69df31f 100644
--- a/UPGRADING.INTERNALS
+++ b/UPGRADING.INTERNALS
@@ -130,6 +130,8 @@ PHP 8.3 INTERNALS UPGRADE NOTES
. php_dom_is_cache_tag_stale_from_doc_ptr()
. php_dom_is_cache_tag_stale_from_node()
. php_dom_mark_cache_tag_up_to_date_from_node()
+ - The function dom_get_elements_by_tag_name_ns_raw() has an additional parameter to indicate
+ the base node of the node list.
g. ext/libxml
- Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and
diff --git a/ext/dom/dom_iterators.c b/ext/dom/dom_iterators.c
index 3d1fe629c8cf8..2cf2c7bb6e7ce 100644
--- a/ext/dom/dom_iterators.c
+++ b/ext/dom/dom_iterators.c
@@ -213,23 +213,24 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
/* The collection is live, we nav the tree from the base object if we cannot
* use the cache to restart from the last point. */
basenode = dom_object_get_node(objmap->baseobj);
+ if (UNEXPECTED(!basenode)) {
+ goto err;
+ }
if (php_dom_is_cache_tag_stale_from_node(&iterator->cache_tag, basenode)) {
+ php_dom_mark_cache_tag_up_to_date_from_node(&iterator->cache_tag, basenode);
+ previndex = 0;
if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
- basenode->type == XML_HTML_DOCUMENT_NODE)) {
- basenode = xmlDocGetRootElement((xmlDoc *) basenode);
- } else if (basenode) {
- basenode = basenode->children;
+ basenode->type == XML_HTML_DOCUMENT_NODE)) {
+ curnode = xmlDocGetRootElement((xmlDoc *) basenode);
} else {
- goto err;
+ curnode = basenode->children;
}
- php_dom_mark_cache_tag_up_to_date_from_node(&iterator->cache_tag, basenode);
- previndex = 0;
} else {
previndex = iter->index - 1;
- basenode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
+ curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
}
curnode = dom_get_elements_by_tag_name_ns_raw(
- basenode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
+ basenode, curnode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
}
}
} else {
@@ -266,7 +267,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
{
dom_object *intern;
dom_nnodemap_object *objmap;
- xmlNodePtr nodep, curnode=NULL;
+ xmlNodePtr curnode=NULL;
int curindex = 0;
HashTable *nodeht;
zval *entry;
@@ -297,24 +298,25 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
ZVAL_COPY(&iterator->curobj, entry);
}
} else {
- nodep = (xmlNode *)dom_object_get_node(objmap->baseobj);
- if (!nodep) {
+ xmlNodePtr basep = (xmlNode *)dom_object_get_node(objmap->baseobj);
+ if (!basep) {
goto err;
}
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
if (objmap->nodetype == XML_ATTRIBUTE_NODE) {
- curnode = (xmlNodePtr) nodep->properties;
+ curnode = (xmlNodePtr) basep->properties;
} else {
- curnode = (xmlNodePtr) nodep->children;
+ curnode = (xmlNodePtr) basep->children;
}
} else {
+ xmlNodePtr nodep = basep;
if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
nodep = xmlDocGetRootElement((xmlDoc *) nodep);
} else {
nodep = nodep->children;
}
curnode = dom_get_elements_by_tag_name_ns_raw(
- nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0);
+ basep, nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0);
}
}
} else {
diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c
index 11cab96e41a1a..fa5bda977d52d 100644
--- a/ext/dom/nodelist.c
+++ b/ext/dom/nodelist.c
@@ -92,13 +92,14 @@ static int get_nodelist_length(dom_object *obj)
}
}
} else {
+ xmlNodePtr basep = nodep;
if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
nodep = xmlDocGetRootElement((xmlDoc *) nodep);
} else {
nodep = nodep->children;
}
dom_get_elements_by_tag_name_ns_raw(
- nodep, (char *) objmap->ns, (char *) objmap->local, &count, -1);
+ basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, -1);
}
objmap->cached_length = count;
@@ -149,7 +150,7 @@ PHP_METHOD(DOMNodeList, item)
xmlNodePtr itemnode = NULL;
dom_nnodemap_object *objmap;
- xmlNodePtr nodep;
+ xmlNodePtr basep;
int count = 0;
id = ZEND_THIS;
@@ -177,8 +178,9 @@ PHP_METHOD(DOMNodeList, item)
return;
}
} else if (objmap->baseobj) {
- nodep = dom_object_get_node(objmap->baseobj);
- if (nodep) {
+ basep = dom_object_get_node(objmap->baseobj);
+ if (basep) {
+ xmlNodePtr nodep = basep;
/* For now we're only able to use cache for forward search.
* TODO: in the future we could extend the logic of the node list such that backwards searches
* are also possible. */
@@ -212,13 +214,13 @@ PHP_METHOD(DOMNodeList, item)
itemnode = nodep;
} else {
if (restart) {
- if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
- nodep = xmlDocGetRootElement((xmlDoc*) nodep);
+ if (basep->type == XML_DOCUMENT_NODE || basep->type == XML_HTML_DOCUMENT_NODE) {
+ nodep = xmlDocGetRootElement((xmlDoc*) basep);
} else {
- nodep = nodep->children;
+ nodep = basep->children;
}
}
- itemnode = dom_get_elements_by_tag_name_ns_raw(nodep, (char *) objmap->ns, (char *) objmap->local, &count, relative_index);
+ itemnode = dom_get_elements_by_tag_name_ns_raw(basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, relative_index);
}
cache_itemnode = true;
}
@@ -238,8 +240,8 @@ PHP_METHOD(DOMNodeList, item)
dom_object *cached_obj = Z_DOMOBJ_P(return_value);
GC_ADDREF(&cached_obj->std);
/* If the tag is stale, all cached data is useless. Otherwise only the cached object is useless. */
- if (php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) {
- php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, nodep);
+ if (php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, itemnode)) {
+ php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, itemnode);
reset_objmap_cache(objmap);
} else {
objmap_cache_release_cached_obj(objmap);
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 1fbf6bb3c0d9f..0d08c3f8e737c 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1227,7 +1227,7 @@ bool dom_has_feature(zend_string *feature, zend_string *version)
}
/* }}} end dom_has_feature */
-xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */
+xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */
{
xmlNodePtr ret = NULL;
bool local_match_any = local[0] == '*' && local[1] == '\0';
@@ -1248,12 +1248,26 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l
(*cur)++;
}
}
- ret = dom_get_elements_by_tag_name_ns_raw(nodep->children, ns, local, cur, index);
- if (ret != NULL) {
- break;
+
+ if (nodep->children) {
+ nodep = nodep->children;
+ continue;
}
}
- nodep = nodep->next;
+
+ if (nodep->next) {
+ nodep = nodep->next;
+ } else {
+ /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
+ do {
+ nodep = nodep->parent;
+ ZEND_ASSERT(nodep != NULL);
+ if (nodep == basep) {
+ return NULL;
+ }
+ } while (nodep->next == NULL);
+ nodep = nodep->next;
+ }
}
return ret;
}
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index fd9cfde059c67..bf0f8ed339428 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -118,7 +118,7 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns);
void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep);
xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName);
void dom_normalize (xmlNodePtr nodep);
-xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index);
+xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index);
void php_dom_create_implementation(zval *retval);
int dom_hierarchy(xmlNodePtr parent, xmlNodePtr child);
bool dom_has_feature(zend_string *feature, zend_string *version);
diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt
new file mode 100644
index 0000000000000..91d810df51bc6
--- /dev/null
+++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt
@@ -0,0 +1,89 @@
+--TEST--
+DOMDocument::getElementsByTagName() liveness tree walk
+--EXTENSIONS--
+dom
+--FILE--
+loadXML('');
+
+echo "-- On first child, for --\n";
+$list = $doc->documentElement->firstChild->getElementsByTagName('b');
+var_dump($list->length);
+for ($i = 0; $i < $list->length; $i++) {
+ echo $i, " ", $list->item($i)->getAttribute('i'), "\n";
+}
+// Try to access one beyond to check if we don't get excess elements
+var_dump($list->item($i));
+
+echo "-- On first child, foreach --\n";
+foreach ($list as $item) {
+ echo $item->getAttribute('i'), "\n";
+}
+
+echo "-- On document, for --\n";
+$list = $doc->getElementsByTagName('b');
+var_dump($list->length);
+for ($i = 0; $i < $list->length; $i++) {
+ echo $i, " ", $list->item($i)->getAttribute('i'), "\n";
+}
+// Try to access one beyond to check if we don't get excess elements
+var_dump($list->item($i));
+
+echo "-- On document, foreach --\n";
+foreach ($list as $item) {
+ echo $item->getAttribute('i'), "\n";
+}
+
+echo "-- On document, after caching followed by removing --\n";
+
+$list = $doc->documentElement->firstChild->getElementsByTagName('b');
+$list->item(0); // Activate item cache
+$list->item(0)->remove();
+$list->item(0)->remove();
+$list->item(0)->remove();
+var_dump($list->length);
+var_dump($list->item(0));
+foreach ($list as $item) {
+ echo "Should not execute\n";
+}
+
+echo "-- On document, clean list after removal --\n";
+$list = $doc->documentElement->firstChild->getElementsByTagName('b');
+var_dump($list->length);
+var_dump($list->item(0));
+foreach ($list as $item) {
+ echo "Should not execute\n";
+}
+
+?>
+--EXPECT--
+-- On first child, for --
+int(3)
+0 1
+1 2
+2 3
+NULL
+-- On first child, foreach --
+1
+2
+3
+-- On document, for --
+int(4)
+0 1
+1 2
+2 3
+3 4
+NULL
+-- On document, foreach --
+1
+2
+3
+4
+-- On document, after caching followed by removing --
+int(0)
+NULL
+-- On document, clean list after removal --
+int(0)
+NULL
diff --git a/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt
index eaa600a666c55..e05bd1ac6f646 100644
--- a/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt
+++ b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt
@@ -5,22 +5,39 @@ dom
--FILE--
createElement( 'root' );
-$doc->appendChild( $root );
-for ( $i = 0; $i < 5; $i++ ) {
- $root->appendChild( $doc->createElement( 'e' ) );
-}
+$doc->loadXML('');
+$root = $doc->documentElement;
$i = 0;
-foreach ( $doc->getElementsByTagName( 'e' ) as $node ) {
+echo "-- Overwrite during iteration --\n";
+
+foreach ($doc->getElementsByTagName('e') as $node) {
if ($i++ == 2) {
$root->textContent = 'overwrite';
}
- var_dump($node->tagName);
+ var_dump($node->tagName, $node->getAttribute('id'));
+}
+
+echo "-- Empty iteration --\n";
+foreach ($doc->getElementsByTagName('e') as $node) {
+ echo "Should not execute\n";
+}
+
+echo "-- After adding an element again --\n";
+$root->appendChild(new DOMElement('e'));
+foreach ($doc->getElementsByTagName('e') as $node) {
+ echo "Should execute once\n";
}
?>
---EXPECTF--
+--EXPECT--
+-- Overwrite during iteration --
string(1) "e"
+string(1) "1"
string(1) "e"
+string(1) "2"
string(1) "e"
+string(1) "3"
+-- Empty iteration --
+-- After adding an element again --
+Should execute once
From 652c73af9b54141aae7dc76684d1fc0680ed124e Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 31 May 2023 22:47:44 +0200
Subject: [PATCH 11/18] Fixup extensions section
---
.../DOMDocument_getElementsByTagName_liveness_simplexml.phpt | 1 +
1 file changed, 1 insertion(+)
diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt
index d1eda9a2271f3..0ac52cd5d662f 100644
--- a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt
+++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt
@@ -2,6 +2,7 @@
DOMDocument::getElementsByTagName() liveness with simplexml_import_dom
--EXTENSIONS--
dom
+simplexml
--FILE--
Date: Thu, 1 Jun 2023 20:23:05 +0200
Subject: [PATCH 12/18] nodep == NULL is impossible here
---
ext/dom/php_dom.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 0d08c3f8e737c..6433c9755fde6 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1237,7 +1237,7 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep,
* This is because for PHP ns == NULL has another meaning: "match every namespace" instead of "match the empty namespace". */
bool ns_match_any = ns == NULL || (ns[0] == '*' && ns[1] == '\0');
- while (nodep != NULL && (*cur <= index || index == -1)) {
+ while (*cur <= index || index == -1) {
if (nodep->type == XML_ELEMENT_NODE) {
if (local_match_any || xmlStrEqual(nodep->name, (xmlChar *)local)) {
if (ns_match_any || (ns[0] == '\0' && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) {
From 7ea0e020df01fad1bb9d1ba6b4ecacb4bb49e054 Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 1 Jun 2023 20:28:59 +0200
Subject: [PATCH 13/18] Get rid of index == -1 condition
---
UPGRADING.INTERNALS | 2 +-
ext/dom/nodelist.c | 2 +-
ext/dom/php_dom.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
index c16e0d69df31f..664c56d832500 100644
--- a/UPGRADING.INTERNALS
+++ b/UPGRADING.INTERNALS
@@ -131,7 +131,7 @@ PHP 8.3 INTERNALS UPGRADE NOTES
. php_dom_is_cache_tag_stale_from_node()
. php_dom_mark_cache_tag_up_to_date_from_node()
- The function dom_get_elements_by_tag_name_ns_raw() has an additional parameter to indicate
- the base node of the node list.
+ the base node of the node list. This function also no longer accepts -1 as the index argument.
g. ext/libxml
- Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and
diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c
index fa5bda977d52d..20e3b18bee883 100644
--- a/ext/dom/nodelist.c
+++ b/ext/dom/nodelist.c
@@ -99,7 +99,7 @@ static int get_nodelist_length(dom_object *obj)
nodep = nodep->children;
}
dom_get_elements_by_tag_name_ns_raw(
- basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, -1);
+ basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, INT_MAX - 1 /* because of <= */);
}
objmap->cached_length = count;
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 6433c9755fde6..feaec825087c7 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1237,7 +1237,7 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep,
* This is because for PHP ns == NULL has another meaning: "match every namespace" instead of "match the empty namespace". */
bool ns_match_any = ns == NULL || (ns[0] == '*' && ns[1] == '\0');
- while (*cur <= index || index == -1) {
+ while (*cur <= index) {
if (nodep->type == XML_ELEMENT_NODE) {
if (local_match_any || xmlStrEqual(nodep->name, (xmlChar *)local)) {
if (ns_match_any || (ns[0] == '\0' && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) {
From c33dcd8b1f9cfdddbdfa12f0b1d9ab1bf4877e5c Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 1 Jun 2023 21:12:41 +0200
Subject: [PATCH 14/18] Improve test
---
.../DOMElement_getElementsByTagName_without_document.phpt | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt b/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt
index cf6acf46162a3..9aebf3139cdf9 100644
--- a/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt
+++ b/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt
@@ -6,7 +6,9 @@ dom
getElementsByTagName('b') as $x);
+foreach ($element->getElementsByTagName("b") as $x) {
+ var_dump($x);
+}
?>
Done
From 85da7e9c99884176bfd49e92eff6187b94b9137c Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 1 Jun 2023 21:13:13 +0200
Subject: [PATCH 15/18] Add NULL check for detached document case
---
ext/dom/php_dom.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index feaec825087c7..f268744c46b6c 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1229,6 +1229,11 @@ bool dom_has_feature(zend_string *feature, zend_string *version)
xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */
{
+ /* Can happen with detached document */
+ if (UNEXPECTED(nodep == NULL)) {
+ return NULL;
+ }
+
xmlNodePtr ret = NULL;
bool local_match_any = local[0] == '*' && local[1] == '\0';
From 39bee8b9127ebe12deec47fa4328a15d684a8865 Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 1 Jun 2023 21:28:13 +0200
Subject: [PATCH 16/18] Avoid allocations and rely on interned strings if
possible
---
UPGRADING.INTERNALS | 2 ++
ext/dom/document.c | 9 ++-------
ext/dom/documenttype.c | 4 ++--
ext/dom/element.c | 9 ++-------
ext/dom/node.c | 4 ++--
ext/dom/php_dom.c | 34 +++++++++++++++++++++++++++++-----
ext/dom/php_dom.h | 4 +++-
7 files changed, 42 insertions(+), 24 deletions(-)
diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
index 664c56d832500..6db2d99ec59b9 100644
--- a/UPGRADING.INTERNALS
+++ b/UPGRADING.INTERNALS
@@ -132,6 +132,8 @@ PHP 8.3 INTERNALS UPGRADE NOTES
. php_dom_mark_cache_tag_up_to_date_from_node()
- The function dom_get_elements_by_tag_name_ns_raw() has an additional parameter to indicate
the base node of the node list. This function also no longer accepts -1 as the index argument.
+ - The function dom_namednode_iter() has additional arguments to avoid recomputing the length of
+ the strings.
g. ext/libxml
- Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 956bffa700927..06c4b2b97b9e3 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -777,7 +777,6 @@ PHP_METHOD(DOMDocument, getElementsByTagName)
size_t name_len;
dom_object *intern, *namednode;
char *name;
- xmlChar *local;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
@@ -788,8 +787,7 @@ PHP_METHOD(DOMDocument, getElementsByTagName)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
- local = xmlCharStrndup(name, name_len);
- dom_namednode_iter(intern, 0, namednode, NULL, local, NULL);
+ dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0);
}
/* }}} end dom_document_get_elements_by_tag_name */
@@ -993,7 +991,6 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)
size_t uri_len, name_len;
dom_object *intern, *namednode;
char *uri, *name;
- xmlChar *local, *nsuri;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) {
@@ -1004,9 +1001,7 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
- local = xmlCharStrndup(name, name_len);
- nsuri = xmlCharStrndup(uri ? uri : "", uri_len);
- dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri);
+ dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len);
}
/* }}} end dom_document_get_elements_by_tag_name_ns */
diff --git a/ext/dom/documenttype.c b/ext/dom/documenttype.c
index b046b05f80eff..cfc4b043edb22 100644
--- a/ext/dom/documenttype.c
+++ b/ext/dom/documenttype.c
@@ -65,7 +65,7 @@ int dom_documenttype_entities_read(dom_object *obj, zval *retval)
entityht = (xmlHashTable *) doctypep->entities;
intern = Z_DOMOBJ_P(retval);
- dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, NULL);
+ dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, 0, NULL, 0);
return SUCCESS;
}
@@ -93,7 +93,7 @@ int dom_documenttype_notations_read(dom_object *obj, zval *retval)
notationht = (xmlHashTable *) doctypep->notations;
intern = Z_DOMOBJ_P(retval);
- dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, NULL);
+ dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, 0, NULL, 0);
return SUCCESS;
}
diff --git a/ext/dom/element.c b/ext/dom/element.c
index 19cef5834657a..93d9ad5fb910a 100644
--- a/ext/dom/element.c
+++ b/ext/dom/element.c
@@ -511,7 +511,6 @@ PHP_METHOD(DOMElement, getElementsByTagName)
size_t name_len;
dom_object *intern, *namednode;
char *name;
- xmlChar *local;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
@@ -522,8 +521,7 @@ PHP_METHOD(DOMElement, getElementsByTagName)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
- local = xmlCharStrndup(name, name_len);
- dom_namednode_iter(intern, 0, namednode, NULL, local, NULL);
+ dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0);
}
/* }}} end dom_element_get_elements_by_tag_name */
@@ -930,7 +928,6 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS)
size_t uri_len, name_len;
dom_object *intern, *namednode;
char *uri, *name;
- xmlChar *local, *nsuri;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) {
@@ -941,9 +938,7 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
- local = xmlCharStrndup(name, name_len);
- nsuri = xmlCharStrndup(uri ? uri : "", uri_len);
- dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri);
+ dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len);
}
/* }}} end dom_element_get_elements_by_tag_name_ns */
diff --git a/ext/dom/node.c b/ext/dom/node.c
index ffd7a87f9015e..78c9b2dca1802 100644
--- a/ext/dom/node.c
+++ b/ext/dom/node.c
@@ -276,7 +276,7 @@ int dom_node_child_nodes_read(dom_object *obj, zval *retval)
php_dom_create_iterator(retval, DOM_NODELIST);
intern = Z_DOMOBJ_P(retval);
- dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, NULL);
+ dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, 0, NULL, 0);
return SUCCESS;
}
@@ -484,7 +484,7 @@ int dom_node_attributes_read(dom_object *obj, zval *retval)
if (nodep->type == XML_ELEMENT_NODE) {
php_dom_create_iterator(retval, DOM_NAMEDNODEMAP);
intern = Z_DOMOBJ_P(retval);
- dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, NULL);
+ dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, 0, NULL, 0);
} else {
ZVAL_NULL(retval);
}
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index f268744c46b6c..85d8b9c975b11 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -942,7 +942,7 @@ void dom_objects_free_storage(zend_object *object)
}
/* }}} */
-void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns) /* {{{ */
+void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len) /* {{{ */
{
dom_nnodemap_object *mapptr = (dom_nnodemap_object *) intern->ptr;
@@ -950,11 +950,33 @@ void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xml
ZVAL_OBJ_COPY(&mapptr->baseobj_zv, &basenode->std);
+ xmlDocPtr doc = basenode->document ? basenode->document->ptr : NULL;
+
mapptr->baseobj = basenode;
mapptr->nodetype = ntype;
mapptr->ht = ht;
- mapptr->local = local;
- mapptr->ns = ns;
+
+ const xmlChar* tmp;
+
+ if (local) {
+ int len = local_len > INT_MAX ? -1 : (int) local_len;
+ if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)local, len)) != NULL) {
+ mapptr->local = (xmlChar*) tmp;
+ } else {
+ mapptr->local = xmlCharStrndup(local, len);
+ mapptr->free_local = true;
+ }
+ }
+
+ if (ns) {
+ int len = ns_len > INT_MAX ? -1 : (int) ns_len;
+ if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)ns, len)) != NULL) {
+ mapptr->ns = (xmlChar*) tmp;
+ } else {
+ mapptr->ns = xmlCharStrndup(ns, len);
+ mapptr->free_ns = true;
+ }
+ }
}
/* }}} */
@@ -1013,10 +1035,10 @@ void dom_nnodemap_objects_free_storage(zend_object *object) /* {{{ */
if (objmap->cached_obj && GC_DELREF(&objmap->cached_obj->std) == 0) {
zend_objects_store_del(&objmap->cached_obj->std);
}
- if (objmap->local) {
+ if (objmap->free_local) {
xmlFree(objmap->local);
}
- if (objmap->ns) {
+ if (objmap->free_ns) {
xmlFree(objmap->ns);
}
if (!Z_ISUNDEF(objmap->baseobj_zv)) {
@@ -1045,7 +1067,9 @@ zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type) /* {{{ */
objmap->nodetype = 0;
objmap->ht = NULL;
objmap->local = NULL;
+ objmap->free_local = false;
objmap->ns = NULL;
+ objmap->free_ns = false;
objmap->cache_tag.modification_nr = 0;
objmap->cached_length = -1;
objmap->cached_obj = NULL;
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index bf0f8ed339428..0602f4166eaa7 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -89,6 +89,8 @@ typedef struct _dom_nnodemap_object {
php_libxml_cache_tag cache_tag;
dom_object *cached_obj;
int cached_obj_index;
+ bool free_local : 1;
+ bool free_ns : 1;
} dom_nnodemap_object;
typedef struct {
@@ -125,7 +127,7 @@ bool dom_has_feature(zend_string *feature, zend_string *version);
int dom_node_is_read_only(xmlNodePtr node);
int dom_node_children_valid(xmlNodePtr node);
void php_dom_create_iterator(zval *return_value, int ce_type);
-void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns);
+void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len);
xmlNodePtr create_notation(const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID);
xmlNode *php_dom_libxml_hash_iter(xmlHashTable *ht, int index);
xmlNode *php_dom_libxml_notation_iter(xmlHashTable *ht, int index);
From efb4f459d2d624088fde50e72fe65caa107b8da2 Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 2 Jun 2023 17:33:38 +0200
Subject: [PATCH 17/18] Throw an exception instead
---
ext/dom/php_dom.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 85d8b9c975b11..44c10ea6a8aec 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1290,10 +1290,14 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep,
/* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
do {
nodep = nodep->parent;
- ZEND_ASSERT(nodep != NULL);
if (nodep == basep) {
return NULL;
}
+ /* This shouldn't happen, unless there's an invalidation bug somewhere. */
+ if (UNEXPECTED(nodep == NULL)) {
+ zend_throw_error(NULL, "Current node in traversal is not in the document. Please report this as a bug in php-src.");
+ return NULL;
+ }
} while (nodep->next == NULL);
nodep = nodep->next;
}
From 8cba4555186f07074dc19b12a6909e64213f9365 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 3 Jun 2023 00:12:45 +0200
Subject: [PATCH 18/18] [ci skip] NEWS
---
NEWS | 3 +++
1 file changed, 3 insertions(+)
diff --git a/NEWS b/NEWS
index 53e4fe73519b2..808f3ed806101 100644
--- a/NEWS
+++ b/NEWS
@@ -44,6 +44,9 @@ PHP NEWS
- Date:
. Implement More Appropriate Date/Time Exceptions RFC. (Derick)
+- DOM:
+ . Fix bug GH-11308 (getElementsByTagName() is O(N^2)). (nielsdos)
+
- Exif:
. Removed unneeded codepaths in exif_process_TIFF_in_JPEG(). (nielsdos)