From 929ec00894330d4508250d41d9e124e945435758 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 1 Oct 2023 14:26:24 +0200 Subject: [PATCH 1/2] Add test with wrong output --- ...TagName_liveness_deallocated_document.phpt | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt diff --git a/ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt b/ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt new file mode 100644 index 000000000000..8a7aaa35a08e --- /dev/null +++ b/ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt @@ -0,0 +1,31 @@ +--TEST-- +getElementsByTagName() liveness with deallocated document +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< + +

1

2

3

+
+XML); + +$ps = $dom->documentElement->getElementsByTagName('p'); +$second = $ps->item(1); +var_dump($second->textContent); +var_dump($ps->length); + +unset($dom); +$dom = $second->ownerDocument; + +$second->parentNode->appendChild($dom->createElement('p', '4')); +var_dump($ps->length); + +?> +--EXPECT-- +string(1) "2" +int(3) +int(3) From 7fa961381d97b26d8e4576fc18b9041a1078b6ab Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 1 Oct 2023 14:27:49 +0200 Subject: [PATCH 2/2] Fix broken cache invalidation with deallocated and reallocated document node The original caching implementation had an oversight in combination with the new lifetime management in DOM for 8.3. The modification counter is stored on the document object itself, but as that can get deallocated when all references disappear, stale cache data can be used. Normally this isn't a problem, unless getElementsByTagName is called not on the document but on a child node. Fix it by moving caching data into the ref object, which will outlive all nodes from a document even if the document object disappears. --- ext/dom/document.c | 15 +++++------ ext/dom/node.c | 14 +++++----- ext/dom/parentnode.c | 14 +++++----- ext/dom/php_dom.h | 21 +++++++++++---- ...TagName_liveness_deallocated_document.phpt | 2 +- ext/libxml/libxml.c | 9 ++----- ext/libxml/php_libxml.h | 26 ++++++++++--------- 7 files changed, 54 insertions(+), 47 deletions(-) diff --git a/ext/dom/document.c b/ext/dom/document.c index 234b71be1d87..29bc8b8c1564 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -818,7 +818,7 @@ PHP_METHOD(DOMDocument, importNode) } } - php_libxml_invalidate_node_list_cache_from_doc(docp); + php_libxml_invalidate_node_list_cache(intern->document); DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern); } @@ -1031,7 +1031,7 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x { php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); if (nodep->doc != new_document) { - php_libxml_invalidate_node_list_cache_from_doc(new_document); + php_libxml_invalidate_node_list_cache(dom_object_new_document->document); /* Note for ATTRIBUTE_NODE: specified is always true in ext/dom, * and since this unlink it; the owner element will be unset (i.e. parentNode). */ @@ -1101,7 +1101,7 @@ PHP_METHOD(DOMDocument, normalizeDocument) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); - php_libxml_invalidate_node_list_cache_from_doc(docp); + php_libxml_invalidate_node_list_cache(intern->document); dom_normalize((xmlNodePtr) docp); } @@ -1327,7 +1327,7 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt xmlDocPtr docp = (xmlDocPtr) dom_object_get_node(intern); dom_doc_propsptr doc_prop = NULL; if (docp != NULL) { - const php_libxml_doc_ptr *doc_ptr = docp->_private; + const php_libxml_ref_obj *doc_ptr = intern->document; ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */ old_modification_nr = doc_ptr->cache_tag.modification_nr; php_libxml_decrement_node_ptr((php_libxml_node_object *) intern); @@ -1348,9 +1348,8 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern); /* Since iterators should invalidate, we need to start the modification number from the old counter */ if (old_modification_nr != 0) { - php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */ - doc_ptr->cache_tag.modification_nr = old_modification_nr; - php_libxml_invalidate_node_list_cache(doc_ptr); + intern->document->cache_tag.modification_nr = old_modification_nr; + php_libxml_invalidate_node_list_cache(intern->document); } RETURN_TRUE; @@ -1611,7 +1610,7 @@ PHP_METHOD(DOMDocument, xinclude) php_dom_remove_xinclude_nodes(root); } - php_libxml_invalidate_node_list_cache_from_doc(docp); + php_libxml_invalidate_node_list_cache(intern->document); if (err) { RETVAL_LONG(err); diff --git a/ext/dom/node.c b/ext/dom/node.c index 627f24cb3395..e2b4977d8927 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -201,7 +201,7 @@ int dom_node_node_value_write(dom_object *obj, zval *newval) break; } - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(obj->document); zend_string_release_ex(str, 0); return SUCCESS; @@ -794,7 +794,7 @@ int dom_node_text_content_write(dom_object *obj, zval *newval) return FAILURE; } - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(obj->document); /* Typed property, this is already a string */ ZEND_ASSERT(Z_TYPE_P(newval) == IS_STRING); @@ -919,7 +919,7 @@ PHP_METHOD(DOMNode, insertBefore) php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL); } - php_libxml_invalidate_node_list_cache_from_doc(parentp->doc); + php_libxml_invalidate_node_list_cache(intern->document); if (ref != NULL) { DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj); @@ -1124,7 +1124,7 @@ PHP_METHOD(DOMNode, replaceChild) nodep->doc->intSubset = (xmlDtd *) newchild; } } - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(intern->document); DOM_RET_OBJ(oldchild, &ret, intern); } /* }}} end dom_node_replace_child */ @@ -1166,7 +1166,7 @@ PHP_METHOD(DOMNode, removeChild) } xmlUnlinkNode(child); - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(intern->document); DOM_RET_OBJ(child, &ret, intern); } /* }}} end dom_node_remove_child */ @@ -1271,7 +1271,7 @@ PHP_METHOD(DOMNode, appendChild) dom_reconcile_ns(nodep->doc, new_child); } - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(intern->document); DOM_RET_OBJ(new_child, &ret, intern); return; @@ -1387,7 +1387,7 @@ PHP_METHOD(DOMNode, normalize) DOM_GET_OBJ(nodep, id, xmlNodePtr, intern); - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(intern->document); dom_normalize(nodep); diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index 8b225d75c92c..1c8e1887f649 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -309,7 +309,7 @@ void dom_parent_node_append(dom_object *context, zval *nodes, uint32_t nodesc) return; } - php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc); + php_libxml_invalidate_node_list_cache(context->document); xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -353,7 +353,7 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc) return; } - php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc); + php_libxml_invalidate_node_list_cache(context->document); xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -404,7 +404,7 @@ void dom_parent_node_after(dom_object *context, zval *nodes, uint32_t nodesc) doc = prevsib->doc; - php_libxml_invalidate_node_list_cache_from_doc(doc); + php_libxml_invalidate_node_list_cache(context->document); /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -456,7 +456,7 @@ void dom_parent_node_before(dom_object *context, zval *nodes, uint32_t nodesc) doc = nextsib->doc; - php_libxml_invalidate_node_list_cache_from_doc(doc); + php_libxml_invalidate_node_list_cache(context->document); /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -523,7 +523,7 @@ void dom_child_node_remove(dom_object *context) return; } - php_libxml_invalidate_node_list_cache_from_doc(child->doc); + php_libxml_invalidate_node_list_cache(context->document); xmlUnlinkNode(child); } @@ -557,7 +557,7 @@ void dom_child_replace_with(dom_object *context, zval *nodes, uint32_t nodesc) } xmlDocPtr doc = parentNode->doc; - php_libxml_invalidate_node_list_cache_from_doc(doc); + php_libxml_invalidate_node_list_cache(context->document); /* Spec step 4: convert nodes into fragment */ xmlNodePtr fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -602,7 +602,7 @@ void dom_parent_node_replace_children(dom_object *context, zval *nodes, uint32_t return; } - php_libxml_invalidate_node_list_cache_from_doc(thisp->doc); + php_libxml_invalidate_node_list_cache(context->document); dom_remove_all_children(thisp); diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 4d415256788a..46900e8e5fe9 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -200,7 +200,7 @@ int php_dom_get_nodelist_length(dom_object *obj); #define DOM_NODELIST 0 #define DOM_NAMEDNODEMAP 1 -static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_doc_ptr *doc_ptr) +static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_ref_obj *doc_ptr) { ZEND_ASSERT(cache_tag != NULL); ZEND_ASSERT(doc_ptr != NULL); @@ -215,15 +215,26 @@ static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php static zend_always_inline bool php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag *cache_tag, const xmlNodePtr node) { ZEND_ASSERT(node != NULL); - return !node->doc || !node->doc->_private || php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, node->doc->_private); + php_libxml_node_ptr *private = node->_private; + if (!private) { + return true; + } + php_libxml_node_object *object_private = private->_private; + if (!object_private || !object_private->document) { + return true; + } + return php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, object_private->document); } static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag *cache_tag, const xmlNodePtr node) { ZEND_ASSERT(cache_tag != NULL); - if (node->doc && node->doc->_private) { - const php_libxml_doc_ptr* doc_ptr = node->doc->_private; - cache_tag->modification_nr = doc_ptr->cache_tag.modification_nr; + php_libxml_node_ptr *private = node->_private; + if (private) { + php_libxml_node_object *object_private = private->_private; + if (object_private->document) { + cache_tag->modification_nr = object_private->document->cache_tag.modification_nr; + } } } diff --git a/ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt b/ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt index 8a7aaa35a08e..d926747399b9 100644 --- a/ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt +++ b/ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt @@ -28,4 +28,4 @@ var_dump($ps->length); --EXPECT-- string(1) "2" int(3) -int(3) +int(4) diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 48b981a95a5c..db3f696ce038 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -1293,13 +1293,7 @@ PHP_LIBXML_API int php_libxml_increment_node_ptr(php_libxml_node_object *object, object->node->_private = private_data; } } else { - if (UNEXPECTED(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)) { - php_libxml_doc_ptr *doc_ptr = emalloc(sizeof(php_libxml_doc_ptr)); - doc_ptr->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */ - object->node = (php_libxml_node_ptr *) doc_ptr; /* downcast */ - } else { - object->node = emalloc(sizeof(php_libxml_node_ptr)); - } + object->node = emalloc(sizeof(php_libxml_node_ptr)); ret_refcount = 1; object->node->node = node; object->node->refcount = 1; @@ -1344,6 +1338,7 @@ PHP_LIBXML_API int php_libxml_increment_doc_ref(php_libxml_node_object *object, object->document->ptr = docp; object->document->refcount = ret_refcount; object->document->doc_props = NULL; + object->document->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */ } return ret_refcount; diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h index 78a5df8f97e6..b325f17adc94 100644 --- a/ext/libxml/php_libxml.h +++ b/ext/libxml/php_libxml.h @@ -57,10 +57,15 @@ typedef struct _libxml_doc_props { bool recover; } libxml_doc_props; +typedef struct { + size_t modification_nr; +} php_libxml_cache_tag; + typedef struct _php_libxml_ref_obj { void *ptr; int refcount; libxml_doc_props *doc_props; + php_libxml_cache_tag cache_tag; } php_libxml_ref_obj; typedef struct _php_libxml_node_ptr { @@ -69,16 +74,6 @@ typedef struct _php_libxml_node_ptr { void *_private; } php_libxml_node_ptr; -typedef struct { - size_t modification_nr; -} php_libxml_cache_tag; - -/* extends php_libxml_node_ptr */ -typedef struct { - php_libxml_node_ptr node_ptr; - php_libxml_cache_tag cache_tag; -} php_libxml_doc_ptr; - typedef struct _php_libxml_node_object { php_libxml_node_ptr *node; php_libxml_ref_obj *document; @@ -91,8 +86,11 @@ static inline php_libxml_node_object *php_libxml_node_fetch_object(zend_object * return (php_libxml_node_object *)((char*)(obj) - obj->handlers->offset); } -static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_doc_ptr *doc_ptr) +static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_ref_obj *doc_ptr) { + if (!doc_ptr) { + return; + } #if SIZEOF_SIZE_T == 8 /* If one operation happens every nanosecond, then it would still require 584 years to overflow * the counter. So we'll just assume this never happens. */ @@ -108,7 +106,11 @@ static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_ static zend_always_inline void php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp) { if (docp && docp->_private) { /* docp is NULL for detached nodes */ - php_libxml_invalidate_node_list_cache((php_libxml_doc_ptr *)docp->_private); + php_libxml_node_ptr *private = docp->_private; + php_libxml_node_object *object_private = private->_private; + if (object_private) { + php_libxml_invalidate_node_list_cache(object_private->document); + } } }