From c68c8c431bd1d6722756531ffe8633a1b63ac38c Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Wed, 14 Jun 2023 21:49:31 +0200 Subject: [PATCH] Fix bug #55294 and #47530 and #47847: namespace reconciliation issues We'll use the DOM wrapper version of libxml2 instead of the regular one. It's conforming to the behaviour we expect of DOM. Most of this patch is tests. I based and extended the tests on the code attached with the aforementioned bug reports. Therefore the credits for the tests: Co-authored-by: hilse at web dot de Co-authored-by: robin2008 at altruists dot org Co-authored-by: sgunderson at bigfoot dot com We'll also change the searching point of the internal reconciliation to start at the top of the added tree to avoid redundant work now that the function is changed. --- ext/dom/php_dom.c | 33 +++++--- ext/dom/tests/bug47530.phpt | 152 ++++++++++++++++++++++++++++++++++++ ext/dom/tests/bug47847.phpt | 27 +++++++ ext/dom/tests/bug55294.phpt | 29 +++++++ 4 files changed, 231 insertions(+), 10 deletions(-) create mode 100644 ext/dom/tests/bug47530.phpt create mode 100644 ext/dom/tests/bug47847.phpt create mode 100644 ext/dom/tests/bug55294.phpt diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 454dc54d8e211..4a6ab2fee9e98 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1441,7 +1441,7 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) { } /* }}} end dom_set_old_ns */ -static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep) +static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr search_parent) { xmlNsPtr nsptr, nsdftptr, curns, prevns = NULL; @@ -1451,7 +1451,7 @@ static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep) while (curns) { nsdftptr = curns->next; if (curns->href != NULL) { - if((nsptr = xmlSearchNsByHref(doc, nodep->parent, curns->href)) && + if((nsptr = xmlSearchNsByHref(doc, search_parent, curns->href)) && (curns->prefix == NULL || xmlStrEqual(nsptr->prefix, curns->prefix))) { curns->next = NULL; if (prevns == NULL) { @@ -1469,23 +1469,34 @@ static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep) } } +static void dom_libxml_reconcile_ensure_namespaces_are_declared(xmlNodePtr nodep) +{ + /* Put on stack to avoid allocation. + * Although libxml2 currently does not use this for the reconciliation, it still + * makes sense to do this just in case libxml2's internal change in the future. */ + xmlDOMWrapCtxt dummy_ctxt = {0}; + xmlDOMWrapReconcileNamespaces(&dummy_ctxt, nodep, /* options */ 0); +} + void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */ { + /* Although the node type will be checked by the libxml2 API, + * we still want to do the internal reconciliation conditionally. */ if (nodep->type == XML_ELEMENT_NODE) { - dom_reconcile_ns_internal(doc, nodep); - xmlReconciliateNs(doc, nodep); + dom_reconcile_ns_internal(doc, nodep, nodep->parent); + dom_libxml_reconcile_ensure_namespaces_are_declared(nodep); } } /* }}} */ -static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last) +static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last, xmlNodePtr search_parent) { ZEND_ASSERT(nodep != NULL); while (true) { if (nodep->type == XML_ELEMENT_NODE) { - dom_reconcile_ns_internal(doc, nodep); + dom_reconcile_ns_internal(doc, nodep, search_parent); if (nodep->children) { - dom_reconcile_ns_list_internal(doc, nodep->children, nodep->last /* process the whole children list */); + dom_reconcile_ns_list_internal(doc, nodep->children, nodep->last /* process the whole children list */, search_parent); } } if (nodep == last) { @@ -1497,10 +1508,12 @@ static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlN void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last) { - dom_reconcile_ns_list_internal(doc, nodep, last); - /* Outside of the recursion above because xmlReconciliateNs() performs its own recursion. */ + dom_reconcile_ns_list_internal(doc, nodep, last, nodep->parent); + /* The loop is outside of the recursion in the above call because + * dom_libxml_reconcile_ensure_namespaces_are_declared() performs its own recursion. */ while (true) { - xmlReconciliateNs(doc, nodep); + /* The internal libxml2 call will already check the node type, no need for us to do it here. */ + dom_libxml_reconcile_ensure_namespaces_are_declared(nodep); if (nodep == last) { break; } diff --git a/ext/dom/tests/bug47530.phpt b/ext/dom/tests/bug47530.phpt new file mode 100644 index 0000000000000..0fb990e0e7bff --- /dev/null +++ b/ext/dom/tests/bug47530.phpt @@ -0,0 +1,152 @@ +--TEST-- +Bug #47530 (Importing objects into document fragments creates bogus "default" namespace) +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); + $root = $doc->documentElement; + $frag = $doc->createDocumentFragment(); + $frag->appendChild($doc->importNode($root->firstChild)); + $root->appendChild($frag); + echo $doc->saveXML(); +} + +function test_document_fragment_without_import() { + $doc = new DOMDocument; + $doc->loadXML(''); + $frag = $doc->createDocumentFragment(); + $frag->appendChild($doc->createElementNS('https://php.net/bar', 'bar')); + $frag->appendChild($doc->createElementNS('', 'bar')); + $element = $doc->documentElement->firstChild; + $element->appendChild($frag); + unset($frag); // Free fragment, should not break getting the namespaceURI below + echo $doc->saveXML(); + unset($doc); + var_dump($element->firstChild->tagName); + var_dump($element->firstChild->namespaceURI); +} + +function test_document_import() { + $xml = << + +
+

Test-Text

+
+
+XML; + + $dom = new DOMDocument(); + $dom->loadXML($xml); + + $dom2 = new DOMDocument(); + $importedNode = $dom2->importNode($dom->documentElement, true); + $dom2->appendChild($importedNode); + + echo $dom2->saveXML(); +} + +function test_partial_document_import() { + $xml = << + +
+

Test-Text

+ More test text + Even more test text +
+
+XML; + + $dom = new DOMDocument(); + $dom->loadXML($xml); + + $dom2 = new DOMDocument(); + $dom2->loadXML(''); + $importedNode = $dom2->importNode($dom->documentElement, true); + $dom2->documentElement->appendChild($importedNode); + + // Freeing the original document shouldn't break the other document + unset($importedNode); + unset($dom); + + echo $dom2->saveXML(); +} + +function test_document_import_with_attributes() { + $dom = new DOMDocument(); + $dom->loadXML('

'); + $dom2 = new DOMDocument(); + $dom2->loadXML('
'); + $dom2->documentElement->appendChild($dom2->importNode($dom->documentElement->firstChild)); + echo $dom2->saveXML(), "\n"; + + $dom2->documentElement->firstChild->appendChild($dom2->importNode($dom->documentElement->firstChild->nextSibling)); + echo $dom2->saveXML(), "\n"; +} + +function test_appendChild_with_shadowing() { + $dom = new DOMDocument(); + $dom->loadXML(''); + + $a = $dom->documentElement->firstElementChild; + $b = $a->nextSibling; + $b->remove(); + $a->appendChild($b); + + echo $dom->saveXML(), "\n"; +} + +echo "-- Test document fragment with import --\n"; +test_document_fragment_with_import(); +echo "-- Test document fragment without import --\n"; +test_document_fragment_without_import(); +echo "-- Test document import --\n"; +test_document_import(); +echo "-- Test partial document import --\n"; +test_partial_document_import(); +echo "-- Test document import with attributes --\n"; +test_document_import_with_attributes(); +echo "-- Test appendChild with shadowing --\n"; +test_appendChild_with_shadowing(); + +?> +--EXPECT-- +-- Test document fragment with import -- + + +-- Test document fragment without import -- + + +string(7) "foo:bar" +string(19) "https://php.net/bar" +-- Test document import -- + + +
+

Test-Text

+
+
+-- Test partial document import -- + + +
+

Test-Text

+ More test text + Even more test text +
+
+-- Test document import with attributes -- + +

+ + +

+ +-- Test appendChild with shadowing -- + +
diff --git a/ext/dom/tests/bug47847.phpt b/ext/dom/tests/bug47847.phpt new file mode 100644 index 0000000000000..324bf9508d5ce --- /dev/null +++ b/ext/dom/tests/bug47847.phpt @@ -0,0 +1,27 @@ +--TEST-- +Bug #47847 (importNode loses the namespace of an XML element) +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< + + + + + +XML); + +$aDOM = new DOMDocument(); +$imported = $aDOM->importNode($fromdom->documentElement->firstElementChild, true); +$aDOM->appendChild($imported); + +echo $aDOM->saveXML(); +?> +--EXPECT-- + + + + diff --git a/ext/dom/tests/bug55294.phpt b/ext/dom/tests/bug55294.phpt new file mode 100644 index 0000000000000..19534955029bc --- /dev/null +++ b/ext/dom/tests/bug55294.phpt @@ -0,0 +1,29 @@ +--TEST-- +Bug #55294 (DOMDocument::importNode shifts namespaces when "default" namespace exists) +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< + + + + +EOXML +); + +$bDOM = new DOMDocument(); +$node = $bDOM->importNode($aDOM->getElementsByTagNameNS('http://example.com/A', 'B')->item(0), true); +$bDOM->appendChild($node); + +echo $bDOM->saveXML(), "\n"; + +?> +--EXPECT-- + + + +