Skip to content

Commit 0ca7791

Browse files
committed
Support templated content
The template element in HTML 5 is special in the sense that it does not add its contents into the DOM tree, but instead keeps them in a separate shadow DOM document fragment. Interacting with the DOM tree cannot touch the elements in the document fragment.
1 parent 5b1b6d2 commit 0ca7791

23 files changed

+424
-86
lines changed

ext/dom/config.m4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ if test "$PHP_DOM" != "no"; then
2727
$LEXBOR_DIR/ns/ns.c \
2828
$LEXBOR_DIR/tag/tag.c"
2929
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c infra.c \
30-
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
30+
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c private_data.c \
3131
domexception.c \
3232
parentnode/tree.c parentnode/css_selectors.c \
3333
processinginstruction.c cdatasection.c \

ext/dom/config.w32

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ if (PHP_DOM == "yes") {
88
CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2")
99
) {
1010
EXTENSION("dom", "php_dom.c attr.c document.c infra.c \
11-
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
11+
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c private_data.c \
1212
domexception.c processinginstruction.c \
1313
cdatasection.c documentfragment.c domimplementation.c element.c inner_html_mixin.c \
1414
node.c characterdata.c documenttype.c \

ext/dom/domimplementation.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
2424
#include "php_dom.h"
2525
#include "namespace_compat.h"
26+
#include "private_data.h"
2627

2728
/*
2829
* class DOMImplementation

ext/dom/element.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "zend_enum.h"
2525
#include "php_dom.h"
2626
#include "namespace_compat.h"
27+
#include "private_data.h"
2728
#include "internal_helpers.h"
2829
#include "dom_properties.h"
2930
#include "token_list.h"
@@ -2030,6 +2031,11 @@ PHP_METHOD(Dom_Element, rename)
20302031
}
20312032
goto cleanup;
20322033
}
2034+
2035+
/* If we currently have a template but the new element type won't be a template, then throw away the templated content. */
2036+
if (is_currently_html_ns && xmlStrEqual(nodep->name, BAD_CAST "template") && !xmlStrEqual(localname, BAD_CAST "template")) {
2037+
php_dom_remove_templated_content(php_dom_get_private_data(intern), nodep);
2038+
}
20332039
}
20342040

20352041
php_libxml_invalidate_node_list_cache(intern->document);

ext/dom/html5_parser.c

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,13 @@
2222
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
2323
#include "php_dom.h"
2424
#include "html5_parser.h"
25+
#include "private_data.h"
2526
#include <lexbor/html/parser.h>
2627
#include <lexbor/html/interfaces/element.h>
28+
#include <lexbor/html/interfaces/template_element.h>
2729
#include <lexbor/dom/dom.h>
2830
#include <libxml/parserInternals.h>
2931
#include <libxml/HTMLtree.h>
30-
#include <Zend/zend.h>
3132

3233
#define WORK_LIST_INIT_SIZE 128
3334
/* libxml2 reserves 2 pointer-sized words for interned strings */
@@ -102,11 +103,12 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
102103
xmlNodePtr root,
103104
bool compact_text_nodes,
104105
bool create_default_ns,
105-
php_dom_libxml_ns_mapper *ns_mapper
106+
php_dom_private_data *private_data
106107
)
107108
{
108109
lexbor_libxml2_bridge_status retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
109110

111+
php_dom_libxml_ns_mapper *ns_mapper = php_dom_ns_mapper_from_private(private_data);
110112
xmlNsPtr html_ns = php_dom_libxml_ns_mapper_ensure_html_ns(ns_mapper);
111113
xmlNsPtr xlink_ns = NULL;
112114
xmlNsPtr prefixed_xmlns_ns = NULL;
@@ -158,12 +160,32 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
158160
/* Instead of xmlSetNs() because we know the arguments are valid. Prevents overhead. */
159161
lxml_element->ns = current_lxml_ns;
160162

161-
for (lxb_dom_node_t *child_node = element->node.last_child; child_node != NULL; child_node = child_node->prev) {
163+
/* Handle template element by creating a fragment node to contain its children.
164+
* Other types of nodes contain their children directly. */
165+
xmlNodePtr lxml_child_parent = lxml_element;
166+
lxb_dom_node_t *child_node = element->node.last_child;
167+
if (lxb_html_tree_node_is(&element->node, LXB_TAG_TEMPLATE)) {
168+
lxml_child_parent = xmlNewDocFragment(lxml_doc);
169+
if (UNEXPECTED(lxml_child_parent == NULL)) {
170+
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
171+
break;
172+
}
173+
174+
lxml_child_parent->parent = lxml_element;
175+
php_dom_add_templated_content(private_data, lxml_element, lxml_child_parent);
176+
177+
lxb_html_template_element_t *template = lxb_html_interface_template(&element->node);
178+
if (template->content != NULL) {
179+
child_node = template->content->node.last_child;
180+
}
181+
}
182+
183+
for (; child_node != NULL; child_node = child_node->prev) {
162184
lexbor_libxml2_bridge_work_list_item_push(
163185
&work_list,
164186
child_node,
165187
entering_namespace,
166-
lxml_element,
188+
lxml_child_parent,
167189
current_lxml_ns
168190
);
169191
}
@@ -307,7 +329,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
307329
xmlDocPtr *doc_out,
308330
bool compact_text_nodes,
309331
bool create_default_ns,
310-
php_dom_libxml_ns_mapper *ns_mapper
332+
php_dom_private_data *private_data
311333
)
312334
{
313335
xmlDocPtr lxml_doc = php_dom_create_html_doc();
@@ -320,7 +342,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
320342
(xmlNodePtr) lxml_doc,
321343
compact_text_nodes,
322344
create_default_ns,
323-
ns_mapper
345+
private_data
324346
);
325347
if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
326348
xmlFreeDoc(lxml_doc);
@@ -336,7 +358,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
336358
xmlNodePtr *fragment_out,
337359
bool compact_text_nodes,
338360
bool create_default_ns,
339-
php_dom_libxml_ns_mapper *ns_mapper
361+
php_dom_private_data *private_data
340362
)
341363
{
342364
xmlNodePtr fragment = xmlNewDocFragment(lxml_doc);
@@ -349,7 +371,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
349371
fragment,
350372
compact_text_nodes,
351373
create_default_ns,
352-
ns_mapper
374+
private_data
353375
);
354376
if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
355377
xmlFreeNode(fragment);

ext/dom/html5_parser.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,15 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
7171
xmlDocPtr *doc_out,
7272
bool compact_text_nodes,
7373
bool create_default_ns,
74-
php_dom_libxml_ns_mapper *ns_mapper
74+
php_dom_private_data *private_data
7575
);
7676
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
7777
lxb_dom_node_t *start_node,
7878
xmlDocPtr lxml_doc,
7979
xmlNodePtr *fragment_out,
8080
bool compact_text_nodes,
8181
bool create_default_ns,
82-
php_dom_libxml_ns_mapper *ns_mapper
82+
php_dom_private_data *private_data
8383
);
8484
void lexbor_libxml2_bridge_report_errors(
8585
const lexbor_libxml2_bridge_parse_context *ctx,

ext/dom/html5_serializer.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -289,9 +289,13 @@ static zend_result dom_html5_serialize_node(dom_html5_serialize_context *ctx, co
289289

290290
case XML_ELEMENT_NODE: {
291291
TRY(dom_html5_serialize_element_start(ctx, node));
292-
if (node->children) {
292+
const xmlNode *children = node->children;
293+
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) && xmlStrEqual(node->name, BAD_CAST "template")) {
294+
children = php_dom_retrieve_templated_content(ctx->private_data, node);
295+
}
296+
if (children) {
293297
if (!dom_html5_serializes_as_void(node)) {
294-
node = node->children;
298+
node = children;
295299
continue;
296300
}
297301
} else {
@@ -301,6 +305,14 @@ static zend_result dom_html5_serialize_node(dom_html5_serialize_context *ctx, co
301305
break;
302306
}
303307

308+
case XML_DOCUMENT_FRAG_NODE: {
309+
if (node->children) {
310+
node = node->children;
311+
continue;
312+
}
313+
break;
314+
}
315+
304316
/* Only exists for compatibility with XML and old DOM. */
305317
case XML_ENTITY_REF_NODE: {
306318
TRY(dom_html5_serialize_entity_ref(ctx, node));
@@ -346,10 +358,15 @@ zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode
346358
}
347359

348360
/* Step 2 not needed because we're not using a string to store the serialized data */
349-
/* Step 3 not needed because we don't support template contents yet */
361+
362+
/* Step 3. If the node is a template element, then let the node instead be the template element's template contents (a DocumentFragment node). */
363+
xmlNodePtr children = php_dom_retrieve_templated_content(ctx->private_data, node);
364+
if (!children) {
365+
children = node->children;
366+
}
350367

351368
/* Step 4 */
352-
return dom_html5_serialize_node(ctx, node->children, node);
369+
return dom_html5_serialize_node(ctx, children, node);
353370
}
354371

355372
/* Variant on the above that is equivalent to the "outer HTML". */

ext/dom/html5_serializer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919

2020
#include <Zend/zend_types.h>
2121
#include <libxml/tree.h>
22+
#include "private_data.h"
2223

2324
typedef struct {
2425
zend_result (*write_string)(void *application_data, const char *buf);
2526
zend_result (*write_string_len)(void *application_data, const char *buf, size_t len);
2627
void *application_data;
28+
php_dom_private_data *private_data;
2729
} dom_html5_serialize_context;
2830

2931
zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode *node);

ext/dom/html_document.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "html5_parser.h"
2626
#include "html5_serializer.h"
2727
#include "namespace_compat.h"
28+
#include "private_data.h"
2829
#include "dom_properties.h"
2930
#include <Zend/zend_smart_string.h>
3031
#include <lexbor/html/encoding.h>
@@ -879,15 +880,14 @@ PHP_METHOD(Dom_HTMLDocument, createFromString)
879880
}
880881

881882
php_dom_private_data *private_data = php_dom_private_data_create();
882-
php_dom_libxml_ns_mapper *ns_mapper = php_dom_ns_mapper_from_private(private_data);
883883

884884
xmlDocPtr lxml_doc;
885885
lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(
886886
document,
887887
&lxml_doc,
888888
options & XML_PARSE_COMPACT,
889889
!(options & DOM_HTML_NO_DEFAULT_NS),
890-
ns_mapper
890+
private_data
891891
);
892892
lexbor_libxml2_bridge_copy_observations(parser->tree, &ctx.observations);
893893
if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
@@ -1071,15 +1071,14 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
10711071
}
10721072

10731073
private_data = php_dom_private_data_create();
1074-
php_dom_libxml_ns_mapper *ns_mapper = php_dom_ns_mapper_from_private(private_data);
10751074

10761075
xmlDocPtr lxml_doc;
10771076
lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(
10781077
document,
10791078
&lxml_doc,
10801079
options & XML_PARSE_COMPACT,
10811080
!(options & DOM_HTML_NO_DEFAULT_NS),
1082-
ns_mapper
1081+
private_data
10831082
);
10841083
lexbor_libxml2_bridge_copy_observations(parser->tree, &ctx.observations);
10851084
if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
@@ -1206,7 +1205,7 @@ static zend_result dom_saveHTML_write_string(void *application_data, const char
12061205
return dom_saveHTML_write_string_len(application_data, buf, strlen(buf));
12071206
}
12081207

1209-
static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *docp, const xmlNode *node)
1208+
static zend_result dom_common_save(dom_output_ctx *output_ctx, dom_object *intern, const xmlDoc *docp, const xmlNode *node)
12101209
{
12111210
/* Initialize everything related to encoding & decoding */
12121211
const lxb_encoding_data_t *decoding_data = lxb_encoding_data(LXB_ENCODING_UTF_8);
@@ -1239,6 +1238,7 @@ static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *doc
12391238
ctx.write_string_len = dom_saveHTML_write_string_len;
12401239
ctx.write_string = dom_saveHTML_write_string;
12411240
ctx.application_data = output_ctx;
1241+
ctx.private_data = php_dom_get_private_data(intern);
12421242
if (UNEXPECTED(dom_html5_serialize_outer(&ctx, node) != SUCCESS)) {
12431243
return FAILURE;
12441244
}
@@ -1297,7 +1297,7 @@ PHP_METHOD(Dom_HTMLDocument, saveHtmlFile)
12971297
dom_output_ctx output_ctx;
12981298
output_ctx.output_data = stream;
12991299
output_ctx.write_output = dom_write_output_stream;
1300-
if (UNEXPECTED(dom_common_save(&output_ctx, docp, (const xmlNode *) docp) != SUCCESS)) {
1300+
if (UNEXPECTED(dom_common_save(&output_ctx, intern, docp, (const xmlNode *) docp) != SUCCESS)) {
13011301
php_stream_close(stream);
13021302
RETURN_FALSE;
13031303
}
@@ -1336,7 +1336,7 @@ PHP_METHOD(Dom_HTMLDocument, saveHtml)
13361336
output_ctx.output_data = &buf;
13371337
output_ctx.write_output = dom_write_output_smart_str;
13381338
/* Can't fail because dom_write_output_smart_str() can't fail. */
1339-
zend_result result = dom_common_save(&output_ctx, docp, node);
1339+
zend_result result = dom_common_save(&output_ctx, intern, docp, node);
13401340
ZEND_ASSERT(result == SUCCESS);
13411341

13421342
RETURN_STR(smart_str_extract(&buf));

ext/dom/inner_html_mixin.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
6868
if (context_document->type == XML_HTML_DOCUMENT_NODE) {
6969
smart_str output = {0};
7070
dom_html5_serialize_context ctx;
71+
ctx.private_data = php_dom_get_private_data(obj);
7172
ctx.application_data = &output;
7273
ctx.write_string = dom_inner_html_write_string;
7374
ctx.write_string_len = dom_inner_html_write_string_len;
@@ -86,11 +87,12 @@ zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
8687
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler("UTF-8");
8788
xmlOutputBufferPtr out = xmlOutputBufferCreateIO(dom_write_smart_str, NULL, &str, handler);
8889
if (EXPECTED(out != NULL)) {
90+
php_dom_private_data *private_data = php_dom_get_private_data(obj);
8991
/* Note: the innerHTML mixin sets the well-formed flag to true. */
9092
xmlNodePtr child = node->children;
9193
status = 0;
9294
while (child != NULL && status == 0) {
93-
status = dom_xml_serialize(ctxt, out, child, false, true);
95+
status = dom_xml_serialize(ctxt, out, child, false, true, private_data);
9496
child = child->next;
9597
}
9698
status |= xmlOutputBufferFlush(out);
@@ -205,7 +207,7 @@ static xmlNodePtr dom_html_fragment_parsing_algorithm(dom_object *obj, xmlNodePt
205207
xmlNodePtr fragment = NULL;
206208
if (node != NULL) {
207209
/* node->last_child could be NULL, but that is allowed. */
208-
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert_fragment(node->last_child, context_node->doc, &fragment, true, true, php_dom_get_ns_mapper(obj));
210+
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert_fragment(node->last_child, context_node->doc, &fragment, true, true, php_dom_get_private_data(obj));
209211
if (UNEXPECTED(status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
210212
php_dom_throw_error(INVALID_STATE_ERR, true);
211213
}
@@ -349,6 +351,11 @@ zend_result dom_element_inner_html_write(dom_object *obj, zval *newval)
349351
return FAILURE;
350352
}
351353

354+
xmlNodePtr template_content = php_dom_retrieve_templated_content(php_dom_get_private_data(obj), context_node);
355+
if (template_content != NULL) {
356+
context_node = template_content;
357+
}
358+
352359
/* We skip the steps involving the template element as context node since we don't do special handling for that. */
353360
dom_remove_all_children(context_node);
354361
return php_dom_pre_insert(obj->document, fragment, context_node, NULL) ? SUCCESS : FAILURE;

ext/dom/internal_helpers.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,12 @@ static zend_always_inline bool dom_is_document_cache_modified_since_parsing(php_
8989
return !doc_ptr || doc_ptr->cache_tag.modification_nr > dom_minimum_modification_nr_since_parsing(doc_ptr);
9090
}
9191

92+
static zend_always_inline zend_long dom_mangle_pointer_for_key(const void *ptr)
93+
{
94+
zend_ulong value = (zend_ulong) (uintptr_t) ptr;
95+
/* Rotate 3/4 bits for better hash distribution because the low 3/4 bits are normally 0. */
96+
const size_t rol_amount = (SIZEOF_ZEND_LONG == 8) ? 4 : 3;
97+
return (value >> rol_amount) | (value << (sizeof(value) * 8 - rol_amount));
98+
}
99+
92100
#endif

0 commit comments

Comments
 (0)