From 4c635679385c59fe7002dc995a6344ef7a82e5b4 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 4 Apr 2024 20:48:45 +0200 Subject: [PATCH] Fix serialization of entity references in attributes --- ext/dom/html5_serializer.c | 18 ++++++++---- ...rialize_entity_reference_in_attribute.phpt | 29 +++++++++++++++++++ ext/dom/xml_serializer.c | 28 +++++++++++++----- 3 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 ext/dom/tests/modern/common/serialize_entity_reference_in_attribute.phpt diff --git a/ext/dom/html5_serializer.c b/ext/dom/html5_serializer.c index 3503bb3afc3f5..2f5a12737e961 100644 --- a/ext/dom/html5_serializer.c +++ b/ext/dom/html5_serializer.c @@ -196,13 +196,21 @@ static zend_result dom_html5_serialize_element_start(dom_html5_serialize_context TRY(ctx->write_string(ctx->application_data, (const char *) attr->name)); } } + TRY(ctx->write_string_len(ctx->application_data, "=\"", strlen("=\""))); - xmlChar *content = xmlNodeGetContent((const xmlNode *) attr); - if (content != NULL) { - zend_result result = dom_html5_escape_string(ctx, (const char *) content, true); - xmlFree(content); - TRY(result); + + for (xmlNodePtr child = attr->children; child != NULL; child = child->next) { + if (child->type == XML_TEXT_NODE) { + if (child->content != NULL) { + TRY(dom_html5_escape_string(ctx, (const char *) child->content, true)); + } + } else if (child->type == XML_ENTITY_REF_NODE) { + TRY(ctx->write_string_len(ctx->application_data, "&", strlen("&"))); + TRY(dom_html5_escape_string(ctx, (const char *) child->name, true)); + TRY(ctx->write_string_len(ctx->application_data, ";", strlen(";"))); + } } + TRY(ctx->write_string_len(ctx->application_data, "\"", strlen("\""))); } diff --git a/ext/dom/tests/modern/common/serialize_entity_reference_in_attribute.phpt b/ext/dom/tests/modern/common/serialize_entity_reference_in_attribute.phpt new file mode 100644 index 0000000000000..79f92439bd5fa --- /dev/null +++ b/ext/dom/tests/modern/common/serialize_entity_reference_in_attribute.phpt @@ -0,0 +1,29 @@ +--TEST-- +Serialize entity reference within attribute +--EXTENSIONS-- +dom +--FILE-- + +]> + +XML); + +$el = $xml->documentElement->firstChild; +echo $xml->saveXML(), "\n"; + +$html = DOM\HTMLDocument::createEmpty(); +$html->append($html->importNode($el, true)); +echo $html->saveHTML(), "\n"; + +?> +--EXPECT-- + + +]> + + diff --git a/ext/dom/xml_serializer.c b/ext/dom/xml_serializer.c index a36e883fa6bc0..39fce7a51555a 100644 --- a/ext/dom/xml_serializer.c +++ b/ext/dom/xml_serializer.c @@ -542,6 +542,24 @@ static zend_always_inline int dom_xml_serialize_text_node(xmlOutputBufferPtr out return dom_xml_common_text_serialization(out, (const char *) text->content, false); } +static int dom_xml_serialize_attribute_node_value(xmlOutputBufferPtr out, xmlAttrPtr attr) +{ + TRY(xmlOutputBufferWriteString(out, (const char *) attr->name)); + TRY(xmlOutputBufferWrite(out, strlen("=\""), "=\"")); + for (xmlNodePtr child = attr->children; child != NULL; child = child->next) { + if (child->type == XML_TEXT_NODE) { + if (child->content != NULL) { + TRY(dom_xml_common_text_serialization(out, (const char *) child->content, true)); + } + } else if (child->type == XML_ENTITY_REF_NODE) { + TRY(xmlOutputBufferWrite(out, strlen("&"), "&")); + TRY(dom_xml_common_text_serialization(out, (const char *) child->name, true)); + TRY(xmlOutputBufferWrite(out, strlen(";"), ";")); + } + } + return xmlOutputBufferWrite(out, strlen("\""), "\""); +} + /* Spec says to do nothing, but that's inconsistent/wrong, see https://github.com/w3c/DOM-Parsing/issues/28 */ static int dom_xml_serialize_attribute_node(xmlOutputBufferPtr out, xmlNodePtr attr) { @@ -549,10 +567,7 @@ static int dom_xml_serialize_attribute_node(xmlOutputBufferPtr out, xmlNodePtr a TRY(xmlOutputBufferWriteString(out, (const char *) attr->ns->prefix)); TRY(xmlOutputBufferWrite(out, strlen(":"), ":")); } - TRY(xmlOutputBufferWriteString(out, (const char *) attr->name)); - TRY(xmlOutputBufferWrite(out, strlen("=\""), "=\"")); - TRY(dom_xml_common_text_serialization(out, (const char *) dom_get_attribute_value((xmlAttrPtr) attr), true)); - return xmlOutputBufferWrite(out, strlen("\""), "\""); + return dom_xml_serialize_attribute_node_value(out, (xmlAttrPtr) attr); } /* https://w3c.github.io/DOM-Parsing/#dfn-xml-serializing-a-comment-node */ @@ -730,10 +745,7 @@ static int dom_xml_serialize_attributes( * => N/A */ /* 3.9. Append the following strings to result, in the order listed: */ - TRY(xmlOutputBufferWriteString(out, (const char *) attr->name)); - TRY(xmlOutputBufferWrite(out, strlen("=\""), "=\"")); - TRY(dom_xml_common_text_serialization(out, (const char *) dom_get_attribute_value(attr), true)); - TRY(xmlOutputBufferWrite(out, strlen("\""), "\"")); + dom_xml_serialize_attribute_node_value(out, attr); } /* 4. Return the value of result.