From f6d544a4ec11efcc68d88b63a644607ef3a5c2fd Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 15 Sep 2024 14:16:00 +0200
Subject: [PATCH] Fix XML serializer errata: xmlns="" serialization should be
allowed
The spec doesn't want to serialize xmlns:foo="", but the description of
the step that checks this does not take into account that xmlns="" must
be allowed. This patch corrects this errata.
---
.../modern/xml/serialize_empty_xmlns.phpt | 25 +++++++++++++++++++
ext/dom/xml_serializer.c | 18 +++++++------
2 files changed, 35 insertions(+), 8 deletions(-)
create mode 100644 ext/dom/tests/modern/xml/serialize_empty_xmlns.phpt
diff --git a/ext/dom/tests/modern/xml/serialize_empty_xmlns.phpt b/ext/dom/tests/modern/xml/serialize_empty_xmlns.phpt
new file mode 100644
index 0000000000000..7770143b2248b
--- /dev/null
+++ b/ext/dom/tests/modern/xml/serialize_empty_xmlns.phpt
@@ -0,0 +1,25 @@
+--TEST--
+XML serializer spec errata: xmlns="" serialization should be allowed
+--EXTENSIONS--
+dom
+--FILE--
+');
+var_dump($dom->documentElement->innerHTML);
+
+// Should not be allowed
+$dom = Dom\XMLDocument::createFromString('');
+$x = $dom->documentElement->firstChild;
+$x->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:a', '');
+try {
+ var_dump($dom->documentElement->innerHTML);
+} catch (DOMException $e) {
+ echo $e->getMessage(), "\n";
+}
+
+?>
+--EXPECT--
+string(13) ""
+The resulting XML serialization is not well-formed
diff --git a/ext/dom/xml_serializer.c b/ext/dom/xml_serializer.c
index 080f7ed30aa64..debbb41fdadeb 100644
--- a/ext/dom/xml_serializer.c
+++ b/ext/dom/xml_serializer.c
@@ -600,7 +600,7 @@ static int dom_xml_serialize_attribute_node_value(xmlOutputBufferPtr out, xmlAtt
/* These steps are from the attribute serialization algorithm's well-formed checks.
* Note that this does not return a boolean but an int to be compatible with the TRY/TRY_CLEANUP interface
* that we do for compatibility with libxml's interfaces. */
-static zend_always_inline int dom_xml_check_xmlns_attribute_requirements(const xmlAttr *attr)
+static zend_always_inline int dom_xml_check_xmlns_attribute_requirements(const xmlAttr *attr, const xmlChar *candidate_prefix)
{
const xmlChar *attr_value = dom_get_attribute_value(attr);
@@ -609,8 +609,9 @@ static zend_always_inline int dom_xml_check_xmlns_attribute_requirements(const x
return -1;
}
- /* 3.5.2.3. If the require well-formed flag is set and the value of attr's value attribute is the empty string */
- if (*attr_value == '\0') {
+ /* 3.5.2.3. If the require well-formed flag is set and the value of attr's value attribute is the empty string.
+ * Errata: an "xmlns" attribute is allowed but not one with a prefix, so the idea in the spec is right but the description isn't. */
+ if (*attr_value == '\0' && candidate_prefix != NULL) {
return -1;
}
@@ -790,15 +791,16 @@ static int dom_xml_serialize_attributes(
}
}
- if (require_well_formed) {
- /* 3.5.2.2 and 3.5.2.3 are done by this call. */
- TRY_OR_CLEANUP(dom_xml_check_xmlns_attribute_requirements(attr));
- }
-
/* 3.5.2.4. the attr's prefix matches the string "xmlns", then let candidate prefix be the string "xmlns". */
if (attr->ns->prefix != NULL && strcmp((const char *) attr->ns->prefix, "xmlns") == 0) {
candidate_prefix = BAD_CAST "xmlns";
}
+
+ /* Errata: step 3.5.2.3 can only really be checked if we already know the candidate prefix. */
+ if (require_well_formed) {
+ /* 3.5.2.2 and 3.5.2.3 are done by this call. */
+ TRY_OR_CLEANUP(dom_xml_check_xmlns_attribute_requirements(attr, candidate_prefix));
+ }
}
/* 3.5.3. Otherwise, the attribute namespace in not the XMLNS namespace. Run these steps: */
else if (candidate_prefix == NULL) { /* https://github.com/w3c/DOM-Parsing/issues/29 */