From e9d60237b680d70aa2a1182ff440795099ef0f9f Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Sat, 17 Jun 2023 00:37:48 +0200 Subject: [PATCH 1/2] Add regression test with wrong output --- ext/dom/tests/gh11404.phpt | 160 +++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 ext/dom/tests/gh11404.phpt diff --git a/ext/dom/tests/gh11404.phpt b/ext/dom/tests/gh11404.phpt new file mode 100644 index 0000000000000..6eafb772533f2 --- /dev/null +++ b/ext/dom/tests/gh11404.phpt @@ -0,0 +1,160 @@ +--TEST-- +GH-11404: DOMDocument::savexml and friends ommit xmlns="" declaration for null namespace, creating incorrect xml representation of the DOM +--EXTENSIONS-- +dom +--FILE-- +createElement('a'); + $nodeB = $dom->createElementNS(null, 'b'); + $nodeC = $dom->createElementNS('', 'c'); + $nodeD = $dom->createElement('d'); + $nodeD->setAttributeNS('some:ns', 'x:attrib', 'val'); + $nodeE = $dom->createElementNS('some:ns', 'e'); + // And these two respect the default ns. + $nodeE->setAttributeNS(null, 'attrib1', 'val'); + $nodeE->setAttributeNS('', 'attrib2', 'val'); + + $dom->documentElement->appendChild($nodeA); + $dom->documentElement->appendChild($nodeB); + $dom->documentElement->appendChild($nodeC); + $dom->documentElement->appendChild($nodeD); + $dom->documentElement->appendChild($nodeE); + + var_dump($nodeA->namespaceURI); + var_dump($nodeB->namespaceURI); + var_dump($nodeC->namespaceURI); + var_dump($nodeD->namespaceURI); + var_dump($nodeE->namespaceURI); + + echo $dom->saveXML(); + + // Create a subtree without using a fragment + $subtree = $dom->createElement('subtree'); + $subtree->appendChild($dom->createElementNS('some:ns', 'subtreechild1')); + $subtree->firstElementChild->appendChild($dom->createElement('subtreechild2')); + $dom->documentElement->appendChild($subtree); + + echo $dom->saveXML(); + + // Create a subtree with the use of a fragment + $subtree = $dom->createDocumentFragment(); + $subtree->appendChild($child3 = $dom->createElement('child3')); + $child3->appendChild($dom->createElement('child4')); + $subtree->appendChild($dom->createElement('child5')); + $dom->documentElement->appendChild($subtree); + + echo $dom->saveXML(); +} + +$dom1 = new DOMDocument; +$dom1->loadXML(''); +testAppendAndAttributes($dom1); + +echo "-- Test append and attributes: without default namespace variation --\n"; + +$dom1 = new DOMDocument; +$dom1->loadXML(''); +testAppendAndAttributes($dom1); + +echo "-- Test import --\n"; + +function testImport(?string $href, string $toBeImported) { + $dom1 = new DOMDocument; + $decl = $href === NULL ? '' : "xmlns=\"$href\""; + $dom1->loadXML(''); + + $dom2 = new DOMDocument; + $dom2->loadXML('' . $toBeImported); + + $dom1->documentElement->append( + $imported = $dom1->importNode($dom2->documentElement, true) + ); + + var_dump($imported->namespaceURI); + + echo $dom1->saveXML(); +} + +testImport(null, ''); +testImport('', ''); +testImport('some:ns', ''); +testImport('', '
'); +testImport('some:ns', '
'); + +echo "-- Namespace URI comparison --\n"; + +$dom1 = new DOMDocument; +$dom1->loadXML('
'); +var_dump($dom1->firstElementChild->namespaceURI); +var_dump($dom1->firstElementChild->firstElementChild->namespaceURI); + +$dom1 = new DOMDocument; +$dom1->appendChild($dom1->createElementNS('a:b', 'parent')); +$dom1->firstElementChild->appendChild($dom1->createElementNS('a:b', 'child1')); +$dom1->firstElementChild->appendChild($second = $dom1->createElement('child2')); +var_dump($dom1->firstElementChild->namespaceURI); +var_dump($dom1->firstElementChild->firstElementChild->namespaceURI); +var_dump($second->namespaceURI); +echo $dom1->saveXML(); + +$dom1 = new DOMDocument; +$dom1->loadXML(''); +var_dump($dom1->firstElementChild->namespaceURI); +$dom1->firstElementChild->appendChild($dom1->createElementNS('a:b', 'tag')); +var_dump($dom1->firstElementChild->firstElementChild->namespaceURI); +?> +--EXPECT-- +-- Test append and attributes: with default namespace variation -- +NULL +NULL +string(0) "" +NULL +string(7) "some:ns" + + + + + + +-- Test append and attributes: without default namespace variation -- +NULL +NULL +string(0) "" +NULL +string(7) "some:ns" + + + + + + +-- Test import -- +NULL + + +NULL + + +NULL + + +NULL + +
+string(7) "some:ns" + +
+-- Namespace URI comparison -- +string(3) "a:b" +string(3) "a:b" +string(3) "a:b" +string(3) "a:b" +NULL + + +string(3) "a:b" +string(3) "a:b" From 76153319387029d85845ef24560bc7ddcaa6ae71 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Sat, 17 Jun 2023 00:39:45 +0200 Subject: [PATCH 2/2] Fix GH-11404: DOMDocument::savexml and friends ommit xmlns="" declaration for null namespace, creating incorrect xml representation of the DOM The NULL namespace is only correct when there is no default namespace override. When there is, we need to manually set it to the empty string namespace. --- ext/dom/document.c | 4 ++++ ext/dom/element.c | 4 ++++ ext/dom/node.c | 16 +++++++-------- ext/dom/php_dom.c | 39 +++++++++++++++++++++++++++++++++++++ ext/dom/tests/bug47530.phpt | 2 +- ext/dom/tests/gh11404.phpt | 20 +++++++++---------- 6 files changed, 65 insertions(+), 20 deletions(-) diff --git a/ext/dom/document.c b/ext/dom/document.c index 93091df83a04f..199c39b5ff503 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -878,6 +878,10 @@ PHP_METHOD(DOMDocument, createElementNS) if (errorcode == 0) { if (xmlValidateName((xmlChar *) localname, 0) == 0) { + /* https://dom.spec.whatwg.org/#validate-and-extract: demands us to set an empty string uri to NULL */ + if (uri_len == 0) { + uri = NULL; + } nodep = xmlNewDocNode(docp, NULL, (xmlChar *) localname, (xmlChar *) value); if (nodep != NULL && uri != NULL) { nsptr = xmlSearchNsByHref(nodep->doc, nodep, (xmlChar *) uri); diff --git a/ext/dom/element.c b/ext/dom/element.c index 44c576a07363f..79786ed907808 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -56,6 +56,10 @@ PHP_METHOD(DOMElement, __construct) if (uri_len > 0) { errorcode = dom_check_qname(name, &localname, &prefix, uri_len, name_len); if (errorcode == 0) { + /* https://dom.spec.whatwg.org/#validate-and-extract: demands us to set an empty string uri to NULL */ + if (uri_len == 0) { + uri = NULL; + } nodep = xmlNewNode (NULL, (xmlChar *)localname); if (nodep != NULL && uri != NULL) { nsptr = dom_get_ns(nodep, uri, &errorcode, prefix); diff --git a/ext/dom/node.c b/ext/dom/node.c index bcf4ee487d38d..dc8c96b85ff7f 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -531,7 +531,6 @@ Since: DOM Level 2 int dom_node_namespace_uri_read(dom_object *obj, zval *retval) { xmlNode *nodep = dom_object_get_node(obj); - char *str = NULL; if (nodep == NULL) { php_dom_throw_error(INVALID_STATE_ERR, 1); @@ -543,20 +542,19 @@ int dom_node_namespace_uri_read(dom_object *obj, zval *retval) case XML_ATTRIBUTE_NODE: case XML_NAMESPACE_DECL: if (nodep->ns != NULL) { - str = (char *) nodep->ns->href; + char *str = (char *) nodep->ns->href; + /* https://dom.spec.whatwg.org/#concept-attribute: namespaceUri is "null or a non-empty string" */ + if (str != NULL && str[0] != '\0') { + ZVAL_STRING(retval, str); + return SUCCESS; + } } break; default: - str = NULL; break; } - if (str != NULL) { - ZVAL_STRING(retval, str); - } else { - ZVAL_NULL(retval); - } - + ZVAL_NULL(retval); return SUCCESS; } diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 4a6ab2fee9e98..1703328c1d262 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1478,6 +1478,23 @@ static void dom_libxml_reconcile_ensure_namespaces_are_declared(xmlNodePtr nodep xmlDOMWrapReconcileNamespaces(&dummy_ctxt, nodep, /* options */ 0); } +static bool dom_must_replace_namespace_by_empty_default(xmlDocPtr doc, xmlNodePtr nodep) +{ + xmlNsPtr default_ns = xmlSearchNs(doc, nodep->parent, NULL); + return default_ns != NULL && default_ns->href != NULL && default_ns->href[0] != '\0'; +} + +static void dom_replace_namespace_by_empty_default(xmlDocPtr doc, xmlNodePtr nodep) +{ + if (nodep->ns == NULL) { + /* The node uses the default empty namespace, but the current default namespace is non-empty. + * We can't unconditionally do this because otherwise libxml2 creates an xmlns="" declaration. + * Note: there's no point searching the oldNs list, because we haven't found it in the tree anyway. + * Ideally this would be pre-allocated but unfortunately libxml2 doesn't offer such a functionality. */ + xmlSetNs(nodep, xmlNewNs(nodep, (const xmlChar *) "", NULL)); + } +} + void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */ { /* Although the node type will be checked by the libxml2 API, @@ -1485,6 +1502,10 @@ void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */ if (nodep->type == XML_ELEMENT_NODE) { dom_reconcile_ns_internal(doc, nodep, nodep->parent); dom_libxml_reconcile_ensure_namespaces_are_declared(nodep); + /* Check nodep->ns first to avoid an expensive lookup. */ + if (nodep->ns == NULL && dom_must_replace_namespace_by_empty_default(doc, nodep)) { + dom_replace_namespace_by_empty_default(doc, nodep); + } } } /* }}} */ @@ -1508,12 +1529,30 @@ static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlN void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last) { + bool did_compute_must_replace_namespace_by_empty_default = false; + bool must_replace_namespace_by_empty_default = false; + dom_reconcile_ns_list_internal(doc, nodep, last, nodep->parent); + /* The loop is outside of the recursion in the above call because * dom_libxml_reconcile_ensure_namespaces_are_declared() performs its own recursion. */ while (true) { /* The internal libxml2 call will already check the node type, no need for us to do it here. */ dom_libxml_reconcile_ensure_namespaces_are_declared(nodep); + + /* We don't have to handle the children, because if their ns's are NULL they'll just take on the default + * which should've been reconciled before. */ + if (nodep->ns == NULL) { + /* This is an optimistic approach: we assume that most of the time we don't need the result of the computation. */ + if (!did_compute_must_replace_namespace_by_empty_default) { + did_compute_must_replace_namespace_by_empty_default = true; + must_replace_namespace_by_empty_default = dom_must_replace_namespace_by_empty_default(doc, nodep); + } + if (must_replace_namespace_by_empty_default) { + dom_replace_namespace_by_empty_default(doc, nodep); + } + } + if (nodep == last) { break; } diff --git a/ext/dom/tests/bug47530.phpt b/ext/dom/tests/bug47530.phpt index 0fb990e0e7bff..ab0b030c94be9 100644 --- a/ext/dom/tests/bug47530.phpt +++ b/ext/dom/tests/bug47530.phpt @@ -121,7 +121,7 @@ test_appendChild_with_shadowing(); -- Test document fragment without import -- - + string(7) "foo:bar" string(19) "https://php.net/bar" -- Test document import -- diff --git a/ext/dom/tests/gh11404.phpt b/ext/dom/tests/gh11404.phpt index 6eafb772533f2..e2a9955874bbf 100644 --- a/ext/dom/tests/gh11404.phpt +++ b/ext/dom/tests/gh11404.phpt @@ -111,27 +111,27 @@ var_dump($dom1->firstElementChild->firstElementChild->namespaceURI); -- Test append and attributes: with default namespace variation -- NULL NULL -string(0) "" +NULL NULL string(7) "some:ns" - + - + - + -- Test append and attributes: without default namespace variation -- NULL NULL -string(0) "" +NULL NULL string(7) "some:ns" - + - + - + -- Test import -- NULL @@ -141,7 +141,7 @@ NULL NULL - + NULL
@@ -155,6 +155,6 @@ string(3) "a:b" string(3) "a:b" NULL - + string(3) "a:b" string(3) "a:b"