From c535df6e8bd1b85a04fc026837b71592bd6af69d Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 6 Aug 2023 14:40:54 +0200 Subject: [PATCH 1/3] Fix #80927: Removing documentElement after creating attribute node: possible use-after-free --- UPGRADING.INTERNALS | 2 + ext/dom/element.c | 2 +- ext/dom/php_dom.c | 31 +------------ ext/dom/php_dom.h | 1 - ext/dom/tests/bug80927.phpt | 87 +++++++++++++++++++++++++++++++++++++ ext/libxml/libxml.c | 52 +++++++++++++++++++++- ext/libxml/php_libxml.h | 1 + 7 files changed, 143 insertions(+), 33 deletions(-) create mode 100644 ext/dom/tests/bug80927.phpt diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 4a3254b2353c3..153bc8067062c 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -150,6 +150,8 @@ PHP 8.3 INTERNALS UPGRADE NOTES dom_parent_node_before() now use an uint32_t argument for the number of nodes instead of int. - There is now a helper function php_dom_get_content_into_zval() to get the contents of a node. This avoids allocation if possible. + - The function dom_set_old_ns() has been moved into ext/libxml as php_libxml_set_old_ns() and + is now publicly exposed as an API. g. ext/libxml - Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and diff --git a/ext/dom/element.c b/ext/dom/element.c index 8d734c12c1c2a..d3178db782e14 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -1497,7 +1497,7 @@ PHP_METHOD(DOMElement, toggleAttribute) } ns->next = NULL; - dom_set_old_ns(thisp->doc, ns); + php_libxml_set_old_ns(thisp->doc, ns); dom_reconcile_ns(thisp->doc, thisp); } else { /* TODO: in the future when namespace bugs are fixed, diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 5a066db8d708c..9e036214bde53 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1436,35 +1436,6 @@ void dom_normalize (xmlNodePtr nodep) } /* }}} end dom_normalize */ - -/* {{{ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) */ -void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) { - if (doc == NULL) - return; - - ZEND_ASSERT(ns->next == NULL); - - /* Note: we'll use a prepend strategy instead of append to - * make sure we don't lose performance when the list is long. - * As libxml2 could assume the xml node is the first one, we'll place our - * new entries after the first one. */ - - if (doc->oldNs == NULL) { - doc->oldNs = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); - if (doc->oldNs == NULL) { - return; - } - memset(doc->oldNs, 0, sizeof(xmlNs)); - doc->oldNs->type = XML_LOCAL_NAMESPACE; - doc->oldNs->href = xmlStrdup(XML_XML_NAMESPACE); - doc->oldNs->prefix = xmlStrdup((const xmlChar *)"xml"); - } else { - ns->next = doc->oldNs->next; - } - doc->oldNs->next = ns; -} -/* }}} end dom_set_old_ns */ - static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr search_parent) { xmlNsPtr nsptr, nsdftptr, curns, prevns = NULL; @@ -1486,7 +1457,7 @@ static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePt /* Note: we can't get here if the ns is already on the oldNs list. * This is because in that case the definition won't be on the node, and * therefore won't be in the nodep->nsDef list. */ - dom_set_old_ns(doc, curns); + php_libxml_set_old_ns(doc, curns); curns = prevns; } } diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index f0a2d598625c3..b4a6a2d01e83b 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -128,7 +128,6 @@ void php_dom_throw_error_with_message(int error_code, char *error_message, int s void node_list_unlink(xmlNodePtr node); int dom_check_qname(char *qname, char **localname, char **prefix, int uri_len, int name_len); xmlNsPtr dom_get_ns(xmlNodePtr node, char *uri, int *errorcode, char *prefix); -void dom_set_old_ns(xmlDoc *doc, xmlNs *ns); void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep); void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last); xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName); diff --git a/ext/dom/tests/bug80927.phpt b/ext/dom/tests/bug80927.phpt new file mode 100644 index 0000000000000..e68ccbe847d38 --- /dev/null +++ b/ext/dom/tests/bug80927.phpt @@ -0,0 +1,87 @@ +--TEST-- +Bug #80927 (Removing documentElement after creating attribute node: possible use-after-free) +--EXTENSIONS-- +dom +--FILE-- +appendChild($dom->createElement("html")); + $a = $dom->createAttributeNS("fake_ns", "test:test"); + $dom->removeChild($dom->documentElement); + + echo $dom->saveXML(); + + var_dump($a->namespaceURI); + var_dump($a->prefix); +} + +function test2($variation) { + $dom = new DOMDocument(); + $dom->appendChild($dom->createElement("html")); + $a = $dom->createAttributeNS("fake_ns", "test:test"); + + $foo = $dom->appendChild($dom->createElement('foo')); + $foo->appendChild($dom->documentElement); + + unset($foo); + + if ($variation === 1) { + $dom->documentElement->remove(); + } else if ($variation === 2) { + $dom->documentElement->firstElementChild->remove(); + } else { + assert(false); + } + + echo $dom->saveXML(); + + var_dump($a->namespaceURI); + var_dump($a->prefix); +} + +function test3() { + $dom = new DOMDocument(); + $dom->appendChild($dom->createElement('html')); + $foobar = $dom->documentElement->appendChild($dom->createElementNS('some:ns', 'foo:bar')); + $foobar2 = $foobar->appendChild($dom->createElementNS('some:ns', 'foo:bar2')); + $foobar->remove(); + unset($foobar); + $dom->documentElement->appendChild($foobar2); + + echo $dom->saveXML(); + + var_dump($foobar2->namespaceURI); + var_dump($foobar2->prefix); +} + +echo "--- Remove namespace declarator for attribute, root ---\n"; +test1(); +echo "--- Remove namespace declarator for attribute, moved root ---\n"; +test2(1); +echo "--- Remove namespace declarator for attribute, moved root child ---\n"; +test2(2); +echo "--- Remove namespace declarator for element ---\n"; +test3(); + +?> +--EXPECT-- +--- Remove namespace declarator for attribute, root --- + +string(7) "fake_ns" +string(4) "test" +--- Remove namespace declarator for attribute, moved root --- + +string(7) "fake_ns" +string(4) "test" +--- Remove namespace declarator for attribute, moved root child --- + + +string(7) "fake_ns" +string(4) "test" +--- Remove namespace declarator for element --- + + +string(7) "some:ns" +string(3) "foo" diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index a8de9bd6d65e1..e2d5d2520083d 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -103,6 +103,39 @@ zend_module_entry libxml_module_entry = { /* }}} */ +static void php_libxml_set_old_ns_list(xmlDocPtr doc, xmlNsPtr first, xmlNsPtr last) +{ + if (UNEXPECTED(doc == NULL)) { + return; + } + + ZEND_ASSERT(last->next == NULL); + + /* Note: we'll use a prepend strategy instead of append to + * make sure we don't lose performance when the list is long. + * As libxml2 could assume the xml node is the first one, we'll place our + * new entries after the first one. */ + + if (UNEXPECTED(doc->oldNs == NULL)) { + doc->oldNs = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); + if (doc->oldNs == NULL) { + return; + } + memset(doc->oldNs, 0, sizeof(xmlNs)); + doc->oldNs->type = XML_LOCAL_NAMESPACE; + doc->oldNs->href = xmlStrdup(XML_XML_NAMESPACE); + doc->oldNs->prefix = xmlStrdup((const xmlChar *)"xml"); + } else { + last->next = doc->oldNs->next; + } + doc->oldNs->next = first; +} + +PHP_LIBXML_API void php_libxml_set_old_ns(xmlDocPtr doc, xmlNsPtr ns) +{ + php_libxml_set_old_ns_list(doc, ns, ns); +} + static void php_libxml_unlink_entity(void *data, void *table, const xmlChar *name) { xmlEntityPtr entity = data; @@ -213,8 +246,25 @@ static void php_libxml_node_free(xmlNodePtr node) xmlHashScan(dtd->pentities, php_libxml_unlink_entity, dtd->pentities); /* No unlinking of notations, see remark above at case XML_NOTATION_NODE. */ } - ZEND_FALLTHROUGH; + xmlFreeNode(node); + break; } + case XML_ELEMENT_NODE: + if (node->nsDef && node->doc) { + /* Make the namespace definition survive the destruction of the holding element. + * Strictly speaking, this isn't necessary if we know the definition isn't used + * or if the subtree does not contain userland references to it. + * But almost all namespace definitions are used somewhere, because otherwise they + * (usually) wouldn't exist in the first place. */ + xmlNsPtr ns = node->nsDef; + xmlNsPtr last = ns; + while (last->next) { + last = last->next; + } + php_libxml_set_old_ns_list(node->doc, ns, last); + node->nsDef = NULL; + } + ZEND_FALLTHROUGH; default: xmlFreeNode(node); break; diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h index 3bd5202f58790..78a5df8f97e66 100644 --- a/ext/libxml/php_libxml.h +++ b/ext/libxml/php_libxml.h @@ -134,6 +134,7 @@ PHP_LIBXML_API int php_libxml_xmlCheckUTF8(const unsigned char *s); PHP_LIBXML_API void php_libxml_switch_context(zval *context, zval *oldcontext); PHP_LIBXML_API void php_libxml_issue_error(int level, const char *msg); PHP_LIBXML_API bool php_libxml_disable_entity_loader(bool disable); +PHP_LIBXML_API void php_libxml_set_old_ns(xmlDocPtr doc, xmlNsPtr ns); /* Init/shutdown functions*/ PHP_LIBXML_API void php_libxml_initialize(void); From 228e112ab27930720e8f10b1ae12602fcd6e8ee2 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 8 Aug 2023 21:03:57 +0200 Subject: [PATCH 2/3] [ci skip] update comment --- ext/libxml/libxml.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index e2d5d2520083d..37776d141c5ae 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -251,11 +251,26 @@ static void php_libxml_node_free(xmlNodePtr node) } case XML_ELEMENT_NODE: if (node->nsDef && node->doc) { - /* Make the namespace definition survive the destruction of the holding element. - * Strictly speaking, this isn't necessary if we know the definition isn't used - * or if the subtree does not contain userland references to it. - * But almost all namespace definitions are used somewhere, because otherwise they - * (usually) wouldn't exist in the first place. */ + /* Make the namespace declaration survive the destruction of the holding element. + * This prevents a use-after-free on the namespace declaration. + * + * The main problem is that libxml2 doesn't have a reference count on the namespace declaration. + * We don't actually need to save the namespace declaration if we know the subtree it belongs to + * has no references from userland. However, we can't know that without traversing the whole subtree + * (=> slow), or without adding some subtree metadata (=> also slow). + * So we have to assume we need to save everything. + * + * However, namespace declarations are quite rare in comparison to other node types. + * Most node types are either elements, text or attributes. + * And you only need one namespace declaration per namespace (in principle). + * So I expect the number of namespace declarations to be low for an average XML document. + * + * In the worst possible case we have to save all namespace declarations when we for example remove + * the whole document. But given the above reasoning this likely won't be a lot of declarations even + * in the worst case. + * A single declaration only takes about 48 bytes of memory, and I don't expect the worst case to occur + * very often (why would you remove the whole document?). + */ xmlNsPtr ns = node->nsDef; xmlNsPtr last = ns; while (last->next) { From ae07c556d0cabe6b800fe8747abfa7477b952d13 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 12 Aug 2023 17:01:42 +0200 Subject: [PATCH 3/3] Address review comments --- ext/dom/tests/bug80927.phpt | 22 ++++++++++++---------- ext/libxml/libxml.c | 3 ++- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/ext/dom/tests/bug80927.phpt b/ext/dom/tests/bug80927.phpt index e68ccbe847d38..5c12e8fb867d9 100644 --- a/ext/dom/tests/bug80927.phpt +++ b/ext/dom/tests/bug80927.phpt @@ -17,7 +17,12 @@ function test1() { var_dump($a->prefix); } -function test2($variation) { +enum Test2Variation { + case REMOVE_DOCUMENT; + case REMOVE_CHILD; +} + +function test2(Test2Variation $variation) { $dom = new DOMDocument(); $dom->appendChild($dom->createElement("html")); $a = $dom->createAttributeNS("fake_ns", "test:test"); @@ -27,13 +32,10 @@ function test2($variation) { unset($foo); - if ($variation === 1) { - $dom->documentElement->remove(); - } else if ($variation === 2) { - $dom->documentElement->firstElementChild->remove(); - } else { - assert(false); - } + match ($variation) { + Test2Variation::REMOVE_DOCUMENT => $dom->documentElement->remove(), + Test2Variation::REMOVE_CHILD => $dom->documentElement->firstElementChild->remove(), + }; echo $dom->saveXML(); @@ -59,9 +61,9 @@ function test3() { echo "--- Remove namespace declarator for attribute, root ---\n"; test1(); echo "--- Remove namespace declarator for attribute, moved root ---\n"; -test2(1); +test2(Test2Variation::REMOVE_DOCUMENT); echo "--- Remove namespace declarator for attribute, moved root child ---\n"; -test2(2); +test2(Test2Variation::REMOVE_CHILD); echo "--- Remove namespace declarator for element ---\n"; test3(); diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 37776d141c5ae..06ff262b09c06 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -279,7 +279,8 @@ static void php_libxml_node_free(xmlNodePtr node) php_libxml_set_old_ns_list(node->doc, ns, last); node->nsDef = NULL; } - ZEND_FALLTHROUGH; + xmlFreeNode(node); + break; default: xmlFreeNode(node); break;