From 4c635679385c59fe7002dc995a6344ef7a82e5b4 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 4 Apr 2024 20:48:45 +0200
Subject: [PATCH] Fix serialization of entity references in attributes
---
ext/dom/html5_serializer.c | 18 ++++++++----
...rialize_entity_reference_in_attribute.phpt | 29 +++++++++++++++++++
ext/dom/xml_serializer.c | 28 +++++++++++++-----
3 files changed, 62 insertions(+), 13 deletions(-)
create mode 100644 ext/dom/tests/modern/common/serialize_entity_reference_in_attribute.phpt
diff --git a/ext/dom/html5_serializer.c b/ext/dom/html5_serializer.c
index 3503bb3afc3f5..2f5a12737e961 100644
--- a/ext/dom/html5_serializer.c
+++ b/ext/dom/html5_serializer.c
@@ -196,13 +196,21 @@ static zend_result dom_html5_serialize_element_start(dom_html5_serialize_context
TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
}
}
+
TRY(ctx->write_string_len(ctx->application_data, "=\"", strlen("=\"")));
- xmlChar *content = xmlNodeGetContent((const xmlNode *) attr);
- if (content != NULL) {
- zend_result result = dom_html5_escape_string(ctx, (const char *) content, true);
- xmlFree(content);
- TRY(result);
+
+ for (xmlNodePtr child = attr->children; child != NULL; child = child->next) {
+ if (child->type == XML_TEXT_NODE) {
+ if (child->content != NULL) {
+ TRY(dom_html5_escape_string(ctx, (const char *) child->content, true));
+ }
+ } else if (child->type == XML_ENTITY_REF_NODE) {
+ TRY(ctx->write_string_len(ctx->application_data, "&", strlen("&")));
+ TRY(dom_html5_escape_string(ctx, (const char *) child->name, true));
+ TRY(ctx->write_string_len(ctx->application_data, ";", strlen(";")));
+ }
}
+
TRY(ctx->write_string_len(ctx->application_data, "\"", strlen("\"")));
}
diff --git a/ext/dom/tests/modern/common/serialize_entity_reference_in_attribute.phpt b/ext/dom/tests/modern/common/serialize_entity_reference_in_attribute.phpt
new file mode 100644
index 0000000000000..79f92439bd5fa
--- /dev/null
+++ b/ext/dom/tests/modern/common/serialize_entity_reference_in_attribute.phpt
@@ -0,0 +1,29 @@
+--TEST--
+Serialize entity reference within attribute
+--EXTENSIONS--
+dom
+--FILE--
+
+]>
+
+XML);
+
+$el = $xml->documentElement->firstChild;
+echo $xml->saveXML(), "\n";
+
+$html = DOM\HTMLDocument::createEmpty();
+$html->append($html->importNode($el, true));
+echo $html->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+
+]>
+
+
diff --git a/ext/dom/xml_serializer.c b/ext/dom/xml_serializer.c
index a36e883fa6bc0..39fce7a51555a 100644
--- a/ext/dom/xml_serializer.c
+++ b/ext/dom/xml_serializer.c
@@ -542,6 +542,24 @@ static zend_always_inline int dom_xml_serialize_text_node(xmlOutputBufferPtr out
return dom_xml_common_text_serialization(out, (const char *) text->content, false);
}
+static int dom_xml_serialize_attribute_node_value(xmlOutputBufferPtr out, xmlAttrPtr attr)
+{
+ TRY(xmlOutputBufferWriteString(out, (const char *) attr->name));
+ TRY(xmlOutputBufferWrite(out, strlen("=\""), "=\""));
+ for (xmlNodePtr child = attr->children; child != NULL; child = child->next) {
+ if (child->type == XML_TEXT_NODE) {
+ if (child->content != NULL) {
+ TRY(dom_xml_common_text_serialization(out, (const char *) child->content, true));
+ }
+ } else if (child->type == XML_ENTITY_REF_NODE) {
+ TRY(xmlOutputBufferWrite(out, strlen("&"), "&"));
+ TRY(dom_xml_common_text_serialization(out, (const char *) child->name, true));
+ TRY(xmlOutputBufferWrite(out, strlen(";"), ";"));
+ }
+ }
+ return xmlOutputBufferWrite(out, strlen("\""), "\"");
+}
+
/* Spec says to do nothing, but that's inconsistent/wrong, see https://github.com/w3c/DOM-Parsing/issues/28 */
static int dom_xml_serialize_attribute_node(xmlOutputBufferPtr out, xmlNodePtr attr)
{
@@ -549,10 +567,7 @@ static int dom_xml_serialize_attribute_node(xmlOutputBufferPtr out, xmlNodePtr a
TRY(xmlOutputBufferWriteString(out, (const char *) attr->ns->prefix));
TRY(xmlOutputBufferWrite(out, strlen(":"), ":"));
}
- TRY(xmlOutputBufferWriteString(out, (const char *) attr->name));
- TRY(xmlOutputBufferWrite(out, strlen("=\""), "=\""));
- TRY(dom_xml_common_text_serialization(out, (const char *) dom_get_attribute_value((xmlAttrPtr) attr), true));
- return xmlOutputBufferWrite(out, strlen("\""), "\"");
+ return dom_xml_serialize_attribute_node_value(out, (xmlAttrPtr) attr);
}
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serializing-a-comment-node */
@@ -730,10 +745,7 @@ static int dom_xml_serialize_attributes(
* => N/A */
/* 3.9. Append the following strings to result, in the order listed: */
- TRY(xmlOutputBufferWriteString(out, (const char *) attr->name));
- TRY(xmlOutputBufferWrite(out, strlen("=\""), "=\""));
- TRY(dom_xml_common_text_serialization(out, (const char *) dom_get_attribute_value(attr), true));
- TRY(xmlOutputBufferWrite(out, strlen("\""), "\""));
+ dom_xml_serialize_attribute_node_value(out, attr);
}
/* 4. Return the value of result.