Skip to content

Fix GH-11404: DOMDocument::savexml and friends ommit xmlns="" declara… #11428

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ext/dom/document.c
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,10 @@ PHP_METHOD(DOMDocument, createElementNS)

if (errorcode == 0) {
if (xmlValidateName((xmlChar *) localname, 0) == 0) {
/* https://dom.spec.whatwg.org/#validate-and-extract: demands us to set an empty string uri to NULL */
if (uri_len == 0) {
uri = NULL;
}
nodep = xmlNewDocNode(docp, NULL, (xmlChar *) localname, (xmlChar *) value);
if (nodep != NULL && uri != NULL) {
nsptr = xmlSearchNsByHref(nodep->doc, nodep, (xmlChar *) uri);
Expand Down
4 changes: 4 additions & 0 deletions ext/dom/element.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ PHP_METHOD(DOMElement, __construct)
if (uri_len > 0) {
errorcode = dom_check_qname(name, &localname, &prefix, uri_len, name_len);
if (errorcode == 0) {
/* https://dom.spec.whatwg.org/#validate-and-extract: demands us to set an empty string uri to NULL */
if (uri_len == 0) {
uri = NULL;
}
nodep = xmlNewNode (NULL, (xmlChar *)localname);
if (nodep != NULL && uri != NULL) {
nsptr = dom_get_ns(nodep, uri, &errorcode, prefix);
Expand Down
16 changes: 7 additions & 9 deletions ext/dom/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,6 @@ Since: DOM Level 2
int dom_node_namespace_uri_read(dom_object *obj, zval *retval)
{
xmlNode *nodep = dom_object_get_node(obj);
char *str = NULL;

if (nodep == NULL) {
php_dom_throw_error(INVALID_STATE_ERR, 1);
Expand All @@ -543,20 +542,19 @@ int dom_node_namespace_uri_read(dom_object *obj, zval *retval)
case XML_ATTRIBUTE_NODE:
case XML_NAMESPACE_DECL:
if (nodep->ns != NULL) {
str = (char *) nodep->ns->href;
char *str = (char *) nodep->ns->href;
/* https://dom.spec.whatwg.org/#concept-attribute: namespaceUri is "null or a non-empty string" */
if (str != NULL && str[0] != '\0') {
ZVAL_STRING(retval, str);
return SUCCESS;
}
}
break;
default:
str = NULL;
break;
}

if (str != NULL) {
ZVAL_STRING(retval, str);
} else {
ZVAL_NULL(retval);
}

ZVAL_NULL(retval);
return SUCCESS;
}

Expand Down
39 changes: 39 additions & 0 deletions ext/dom/php_dom.c
Original file line number Diff line number Diff line change
Expand Up @@ -1478,13 +1478,34 @@ static void dom_libxml_reconcile_ensure_namespaces_are_declared(xmlNodePtr nodep
xmlDOMWrapReconcileNamespaces(&dummy_ctxt, nodep, /* options */ 0);
}

static bool dom_must_replace_namespace_by_empty_default(xmlDocPtr doc, xmlNodePtr nodep)
{
xmlNsPtr default_ns = xmlSearchNs(doc, nodep->parent, NULL);
return default_ns != NULL && default_ns->href != NULL && default_ns->href[0] != '\0';
}

static void dom_replace_namespace_by_empty_default(xmlDocPtr doc, xmlNodePtr nodep)
{
if (nodep->ns == NULL) {
/* The node uses the default empty namespace, but the current default namespace is non-empty.
* We can't unconditionally do this because otherwise libxml2 creates an xmlns="" declaration.
* Note: there's no point searching the oldNs list, because we haven't found it in the tree anyway.
* Ideally this would be pre-allocated but unfortunately libxml2 doesn't offer such a functionality. */
xmlSetNs(nodep, xmlNewNs(nodep, (const xmlChar *) "", NULL));
}
}

void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */
{
/* Although the node type will be checked by the libxml2 API,
* we still want to do the internal reconciliation conditionally. */
if (nodep->type == XML_ELEMENT_NODE) {
dom_reconcile_ns_internal(doc, nodep, nodep->parent);
dom_libxml_reconcile_ensure_namespaces_are_declared(nodep);
/* Check nodep->ns first to avoid an expensive lookup. */
if (nodep->ns == NULL && dom_must_replace_namespace_by_empty_default(doc, nodep)) {
dom_replace_namespace_by_empty_default(doc, nodep);
}
}
}
/* }}} */
Expand All @@ -1508,12 +1529,30 @@ static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlN

void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last)
{
bool did_compute_must_replace_namespace_by_empty_default = false;
bool must_replace_namespace_by_empty_default = false;

dom_reconcile_ns_list_internal(doc, nodep, last, nodep->parent);

/* The loop is outside of the recursion in the above call because
* dom_libxml_reconcile_ensure_namespaces_are_declared() performs its own recursion. */
while (true) {
/* The internal libxml2 call will already check the node type, no need for us to do it here. */
dom_libxml_reconcile_ensure_namespaces_are_declared(nodep);

/* We don't have to handle the children, because if their ns's are NULL they'll just take on the default
* which should've been reconciled before. */
if (nodep->ns == NULL) {
/* This is an optimistic approach: we assume that most of the time we don't need the result of the computation. */
if (!did_compute_must_replace_namespace_by_empty_default) {
did_compute_must_replace_namespace_by_empty_default = true;
must_replace_namespace_by_empty_default = dom_must_replace_namespace_by_empty_default(doc, nodep);
}
if (must_replace_namespace_by_empty_default) {
dom_replace_namespace_by_empty_default(doc, nodep);
}
}

if (nodep == last) {
break;
}
Expand Down
2 changes: 1 addition & 1 deletion ext/dom/tests/bug47530.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ test_appendChild_with_shadowing();
<html xmlns="https://php.net/something" xmlns:ns="https://php.net/whatever"><element ns:foo="https://php.net/bar"/></html>
-- Test document fragment without import --
<?xml version="1.0"?>
<html xmlns=""><element xmlns:foo="https://php.net/bar"><foo:bar/><bar xmlns=""/></element></html>
<html xmlns=""><element xmlns:foo="https://php.net/bar"><foo:bar/><bar/></element></html>
string(7) "foo:bar"
string(19) "https://php.net/bar"
-- Test document import --
Expand Down
160 changes: 160 additions & 0 deletions ext/dom/tests/gh11404.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
--TEST--
GH-11404: DOMDocument::savexml and friends ommit xmlns="" declaration for null namespace, creating incorrect xml representation of the DOM
--EXTENSIONS--
dom
--FILE--
<?php

echo "-- Test append and attributes: with default namespace variation --\n";

function testAppendAndAttributes($dom) {
$nodeA = $dom->createElement('a');
$nodeB = $dom->createElementNS(null, 'b');
$nodeC = $dom->createElementNS('', 'c');
$nodeD = $dom->createElement('d');
$nodeD->setAttributeNS('some:ns', 'x:attrib', 'val');
$nodeE = $dom->createElementNS('some:ns', 'e');
// And these two respect the default ns.
$nodeE->setAttributeNS(null, 'attrib1', 'val');
$nodeE->setAttributeNS('', 'attrib2', 'val');

$dom->documentElement->appendChild($nodeA);
$dom->documentElement->appendChild($nodeB);
$dom->documentElement->appendChild($nodeC);
$dom->documentElement->appendChild($nodeD);
$dom->documentElement->appendChild($nodeE);

var_dump($nodeA->namespaceURI);
var_dump($nodeB->namespaceURI);
var_dump($nodeC->namespaceURI);
var_dump($nodeD->namespaceURI);
var_dump($nodeE->namespaceURI);

echo $dom->saveXML();

// Create a subtree without using a fragment
$subtree = $dom->createElement('subtree');
$subtree->appendChild($dom->createElementNS('some:ns', 'subtreechild1'));
$subtree->firstElementChild->appendChild($dom->createElement('subtreechild2'));
$dom->documentElement->appendChild($subtree);

echo $dom->saveXML();

// Create a subtree with the use of a fragment
$subtree = $dom->createDocumentFragment();
$subtree->appendChild($child3 = $dom->createElement('child3'));
$child3->appendChild($dom->createElement('child4'));
$subtree->appendChild($dom->createElement('child5'));
$dom->documentElement->appendChild($subtree);

echo $dom->saveXML();
}

$dom1 = new DOMDocument;
$dom1->loadXML('<?xml version="1.0" ?><with xmlns="some:ns" />');
testAppendAndAttributes($dom1);

echo "-- Test append and attributes: without default namespace variation --\n";

$dom1 = new DOMDocument;
$dom1->loadXML('<?xml version="1.0" ?><with/>');
testAppendAndAttributes($dom1);

echo "-- Test import --\n";

function testImport(?string $href, string $toBeImported) {
$dom1 = new DOMDocument;
$decl = $href === NULL ? '' : "xmlns=\"$href\"";
$dom1->loadXML('<?xml version="1.0" ?><with ' . $decl . '/>');

$dom2 = new DOMDocument;
$dom2->loadXML('<?xml version="1.0" ?>' . $toBeImported);

$dom1->documentElement->append(
$imported = $dom1->importNode($dom2->documentElement, true)
);

var_dump($imported->namespaceURI);

echo $dom1->saveXML();
}

testImport(null, '<none/>');
testImport('', '<none/>');
testImport('some:ns', '<none/>');
testImport('', '<none><div xmlns="some:ns"/></none>');
testImport('some:ns', '<none xmlns="some:ns"><div xmlns=""/></none>');

echo "-- Namespace URI comparison --\n";

$dom1 = new DOMDocument;
$dom1->loadXML('<?xml version="1.0"?><test xmlns="a:b"><div/></test>');
var_dump($dom1->firstElementChild->namespaceURI);
var_dump($dom1->firstElementChild->firstElementChild->namespaceURI);

$dom1 = new DOMDocument;
$dom1->appendChild($dom1->createElementNS('a:b', 'parent'));
$dom1->firstElementChild->appendChild($dom1->createElementNS('a:b', 'child1'));
$dom1->firstElementChild->appendChild($second = $dom1->createElement('child2'));
var_dump($dom1->firstElementChild->namespaceURI);
var_dump($dom1->firstElementChild->firstElementChild->namespaceURI);
var_dump($second->namespaceURI);
echo $dom1->saveXML();

$dom1 = new DOMDocument;
$dom1->loadXML('<?xml version="1.0"?><test xmlns="a:b"/>');
var_dump($dom1->firstElementChild->namespaceURI);
$dom1->firstElementChild->appendChild($dom1->createElementNS('a:b', 'tag'));
var_dump($dom1->firstElementChild->firstElementChild->namespaceURI);
?>
--EXPECT--
-- Test append and attributes: with default namespace variation --
NULL
NULL
NULL
NULL
string(7) "some:ns"
<?xml version="1.0"?>
<with xmlns="some:ns"><a xmlns=""/><b xmlns=""/><c xmlns=""/><d xmlns:x="some:ns" xmlns="" x:attrib="val"/><e attrib1="val" attrib2="val"/></with>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it normal that the <d> tag here has 2 xmlns attributes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's xmlns:x and xmlns. xmlns:x defines the namespace for prefix x which is used for x:attrib. The reason it also has xmlns is for the default namespace because d itself is in the empty namespace. If it were not to define xmlns="" then d would instead be in some:ns (inherited from <with>).
It's a tricky test case :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, thanks for the explanation!

<?xml version="1.0"?>
<with xmlns="some:ns"><a xmlns=""/><b xmlns=""/><c xmlns=""/><d xmlns:x="some:ns" xmlns="" x:attrib="val"/><e attrib1="val" attrib2="val"/><subtree xmlns=""><subtreechild1 xmlns="some:ns"><subtreechild2 xmlns=""/></subtreechild1></subtree></with>
<?xml version="1.0"?>
<with xmlns="some:ns"><a xmlns=""/><b xmlns=""/><c xmlns=""/><d xmlns:x="some:ns" xmlns="" x:attrib="val"/><e attrib1="val" attrib2="val"/><subtree xmlns=""><subtreechild1 xmlns="some:ns"><subtreechild2 xmlns=""/></subtreechild1></subtree><child3 xmlns=""><child4/></child3><child5 xmlns=""/></with>
-- Test append and attributes: without default namespace variation --
NULL
NULL
NULL
NULL
string(7) "some:ns"
<?xml version="1.0"?>
<with><a/><b/><c/><d xmlns:x="some:ns" x:attrib="val"/><e xmlns="some:ns" attrib1="val" attrib2="val"/></with>
<?xml version="1.0"?>
<with><a/><b/><c/><d xmlns:x="some:ns" x:attrib="val"/><e xmlns="some:ns" attrib1="val" attrib2="val"/><subtree><subtreechild1 xmlns="some:ns"><subtreechild2 xmlns=""/></subtreechild1></subtree></with>
<?xml version="1.0"?>
<with><a/><b/><c/><d xmlns:x="some:ns" x:attrib="val"/><e xmlns="some:ns" attrib1="val" attrib2="val"/><subtree><subtreechild1 xmlns="some:ns"><subtreechild2 xmlns=""/></subtreechild1></subtree><child3><child4/></child3><child5/></with>
-- Test import --
NULL
<?xml version="1.0"?>
<with><none/></with>
NULL
<?xml version="1.0"?>
<with xmlns=""><none/></with>
NULL
<?xml version="1.0"?>
<with xmlns="some:ns"><none xmlns=""/></with>
NULL
<?xml version="1.0"?>
<with xmlns=""><none><div xmlns="some:ns"/></none></with>
string(7) "some:ns"
<?xml version="1.0"?>
<with xmlns="some:ns"><none><div xmlns=""/></none></with>
-- Namespace URI comparison --
string(3) "a:b"
string(3) "a:b"
string(3) "a:b"
string(3) "a:b"
NULL
<?xml version="1.0"?>
<parent xmlns="a:b"><child1/><child2 xmlns=""/></parent>
string(3) "a:b"
string(3) "a:b"