Skip to content

Commit 6980eba

Browse files
committed
Support templated content
The template element in HTML 5 is special in the sense that it does not add its contents into the DOM tree, but instead keeps them in a separate shadow DOM document fragment. Interacting with the DOM tree cannot touch the elements in the document fragment. Closes GH-14906.
1 parent 8ad5c64 commit 6980eba

35 files changed

+723
-121
lines changed

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ PHP NEWS
66
. Fixed bug GH-14801 (Fix build for armv7). (andypost)
77
. Implemented property hooks RFC. (ilutov)
88

9+
- DOM:
10+
. Improve support for template elements. (nielsdos)
11+
912
- GD:
1013
. Check overflow/underflow for imagescale/imagefilter. (David Carlier)
1114

ext/dom/config.m4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ if test "$PHP_DOM" != "no"; then
2727
$LEXBOR_DIR/ns/ns.c \
2828
$LEXBOR_DIR/tag/tag.c"
2929
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c infra.c \
30-
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
30+
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c private_data.c \
3131
domexception.c \
3232
parentnode/tree.c parentnode/css_selectors.c \
3333
processinginstruction.c cdatasection.c \

ext/dom/config.w32

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ if (PHP_DOM == "yes") {
88
CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2")
99
) {
1010
EXTENSION("dom", "php_dom.c attr.c document.c infra.c \
11-
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
11+
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c private_data.c \
1212
domexception.c processinginstruction.c \
1313
cdatasection.c documentfragment.c domimplementation.c element.c inner_html_mixin.c \
1414
node.c characterdata.c documenttype.c \

ext/dom/document.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
2424
#include "php_dom.h"
2525
#include "namespace_compat.h"
26+
#include "private_data.h"
2627
#include "xml_serializer.h"
2728
#include "internal_helpers.h"
2829
#include "dom_properties.h"

ext/dom/domimplementation.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
2424
#include "php_dom.h"
2525
#include "namespace_compat.h"
26+
#include "private_data.h"
2627

2728
/*
2829
* class DOMImplementation

ext/dom/element.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "zend_enum.h"
2525
#include "php_dom.h"
2626
#include "namespace_compat.h"
27+
#include "private_data.h"
2728
#include "internal_helpers.h"
2829
#include "dom_properties.h"
2930
#include "token_list.h"
@@ -2030,6 +2031,16 @@ PHP_METHOD(Dom_Element, rename)
20302031
}
20312032
goto cleanup;
20322033
}
2034+
2035+
/* If we currently have a template but the new element type won't be a template, then throw away the templated content. */
2036+
if (is_currently_html_ns && xmlStrEqual(nodep->name, BAD_CAST "template") && !xmlStrEqual(localname, BAD_CAST "template")) {
2037+
php_dom_throw_error_with_message(
2038+
INVALID_MODIFICATION_ERR,
2039+
"It is not possible to rename the template element because it hosts a document fragment",
2040+
/* strict */ true
2041+
);
2042+
goto cleanup;
2043+
}
20332044
}
20342045

20352046
php_libxml_invalidate_node_list_cache(intern->document);

ext/dom/html5_parser.c

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,13 @@
2222
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
2323
#include "php_dom.h"
2424
#include "html5_parser.h"
25+
#include "private_data.h"
2526
#include <lexbor/html/parser.h>
2627
#include <lexbor/html/interfaces/element.h>
28+
#include <lexbor/html/interfaces/template_element.h>
2729
#include <lexbor/dom/dom.h>
2830
#include <libxml/parserInternals.h>
2931
#include <libxml/HTMLtree.h>
30-
#include <Zend/zend.h>
3132

3233
#define WORK_LIST_INIT_SIZE 128
3334
/* libxml2 reserves 2 pointer-sized words for interned strings */
@@ -63,14 +64,20 @@ static unsigned short sanitize_line_nr(size_t line)
6364
return (unsigned short) line;
6465
}
6566

66-
static const php_dom_ns_magic_token *get_libxml_namespace_href(uintptr_t lexbor_namespace)
67+
struct lxml_ns {
68+
const php_dom_ns_magic_token *token;
69+
const char *href;
70+
size_t href_len;
71+
};
72+
73+
static struct lxml_ns get_libxml_namespace_href(uintptr_t lexbor_namespace)
6774
{
6875
if (lexbor_namespace == LXB_NS_SVG) {
69-
return php_dom_ns_is_svg_magic_token;
76+
return (struct lxml_ns) { php_dom_ns_is_svg_magic_token, ZEND_STRL(DOM_SVG_NS_URI) };
7077
} else if (lexbor_namespace == LXB_NS_MATH) {
71-
return php_dom_ns_is_mathml_magic_token;
78+
return (struct lxml_ns) { php_dom_ns_is_mathml_magic_token, ZEND_STRL(DOM_MATHML_NS_URI) };
7279
} else {
73-
return php_dom_ns_is_html_magic_token;
80+
return (struct lxml_ns) { php_dom_ns_is_html_magic_token, ZEND_STRL(DOM_XHTML_NS_URI) };
7481
}
7582
}
7683

@@ -102,11 +109,12 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
102109
xmlNodePtr root,
103110
bool compact_text_nodes,
104111
bool create_default_ns,
105-
php_dom_libxml_ns_mapper *ns_mapper
112+
php_dom_private_data *private_data
106113
)
107114
{
108115
lexbor_libxml2_bridge_status retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
109116

117+
php_dom_libxml_ns_mapper *ns_mapper = php_dom_ns_mapper_from_private(private_data);
110118
xmlNsPtr html_ns = php_dom_libxml_ns_mapper_ensure_html_ns(ns_mapper);
111119
xmlNsPtr xlink_ns = NULL;
112120
xmlNsPtr prefixed_xmlns_ns = NULL;
@@ -146,24 +154,47 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
146154
if (entering_namespace == LXB_NS_HTML) {
147155
current_lxml_ns = html_ns;
148156
} else {
149-
const php_dom_ns_magic_token *magic_token = get_libxml_namespace_href(entering_namespace);
150-
zend_string *uri = zend_string_init((char *) magic_token, strlen((char *) magic_token), false);
157+
struct lxml_ns ns = get_libxml_namespace_href(entering_namespace);
158+
zend_string *uri = zend_string_init(ns.href, ns.href_len, false);
151159
current_lxml_ns = php_dom_libxml_ns_mapper_get_ns(ns_mapper, NULL, uri);
152160
zend_string_release_ex(uri, false);
153161
if (EXPECTED(current_lxml_ns != NULL)) {
154-
current_lxml_ns->_private = (void *) magic_token;
162+
current_lxml_ns->_private = (void *) ns.token;
155163
}
156164
}
157165
}
158166
/* Instead of xmlSetNs() because we know the arguments are valid. Prevents overhead. */
159167
lxml_element->ns = current_lxml_ns;
160168

161-
for (lxb_dom_node_t *child_node = element->node.last_child; child_node != NULL; child_node = child_node->prev) {
169+
/* Handle template element by creating a fragment node to contain its children.
170+
* Other types of nodes contain their children directly. */
171+
xmlNodePtr lxml_child_parent = lxml_element;
172+
lxb_dom_node_t *child_node = element->node.last_child;
173+
if (lxb_html_tree_node_is(&element->node, LXB_TAG_TEMPLATE)) {
174+
if (create_default_ns) {
175+
lxml_child_parent = xmlNewDocFragment(lxml_doc);
176+
if (UNEXPECTED(lxml_child_parent == NULL)) {
177+
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
178+
break;
179+
}
180+
181+
lxml_child_parent->parent = lxml_element;
182+
dom_add_element_ns_hook(private_data, lxml_element);
183+
php_dom_add_templated_content(private_data, lxml_element, lxml_child_parent);
184+
}
185+
186+
lxb_html_template_element_t *template = lxb_html_interface_template(&element->node);
187+
if (template->content != NULL) {
188+
child_node = template->content->node.last_child;
189+
}
190+
}
191+
192+
for (; child_node != NULL; child_node = child_node->prev) {
162193
lexbor_libxml2_bridge_work_list_item_push(
163194
&work_list,
164195
child_node,
165196
entering_namespace,
166-
lxml_element,
197+
lxml_child_parent,
167198
current_lxml_ns
168199
);
169200
}
@@ -307,7 +338,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
307338
xmlDocPtr *doc_out,
308339
bool compact_text_nodes,
309340
bool create_default_ns,
310-
php_dom_libxml_ns_mapper *ns_mapper
341+
php_dom_private_data *private_data
311342
)
312343
{
313344
xmlDocPtr lxml_doc = php_dom_create_html_doc();
@@ -320,7 +351,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
320351
(xmlNodePtr) lxml_doc,
321352
compact_text_nodes,
322353
create_default_ns,
323-
ns_mapper
354+
private_data
324355
);
325356
if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
326357
xmlFreeDoc(lxml_doc);
@@ -336,7 +367,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
336367
xmlNodePtr *fragment_out,
337368
bool compact_text_nodes,
338369
bool create_default_ns,
339-
php_dom_libxml_ns_mapper *ns_mapper
370+
php_dom_private_data *private_data
340371
)
341372
{
342373
xmlNodePtr fragment = xmlNewDocFragment(lxml_doc);
@@ -349,7 +380,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
349380
fragment,
350381
compact_text_nodes,
351382
create_default_ns,
352-
ns_mapper
383+
private_data
353384
);
354385
if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
355386
xmlFreeNode(fragment);

ext/dom/html5_parser.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,15 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
7171
xmlDocPtr *doc_out,
7272
bool compact_text_nodes,
7373
bool create_default_ns,
74-
php_dom_libxml_ns_mapper *ns_mapper
74+
php_dom_private_data *private_data
7575
);
7676
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
7777
lxb_dom_node_t *start_node,
7878
xmlDocPtr lxml_doc,
7979
xmlNodePtr *fragment_out,
8080
bool compact_text_nodes,
8181
bool create_default_ns,
82-
php_dom_libxml_ns_mapper *ns_mapper
82+
php_dom_private_data *private_data
8383
);
8484
void lexbor_libxml2_bridge_report_errors(
8585
const lexbor_libxml2_bridge_parse_context *ctx,

ext/dom/html5_serializer.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -289,9 +289,13 @@ static zend_result dom_html5_serialize_node(dom_html5_serialize_context *ctx, co
289289

290290
case XML_ELEMENT_NODE: {
291291
TRY(dom_html5_serialize_element_start(ctx, node));
292-
if (node->children) {
292+
const xmlNode *children = node->children;
293+
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) && xmlStrEqual(node->name, BAD_CAST "template")) {
294+
children = php_dom_retrieve_templated_content(ctx->private_data, node);
295+
}
296+
if (children) {
293297
if (!dom_html5_serializes_as_void(node)) {
294-
node = node->children;
298+
node = children;
295299
continue;
296300
}
297301
} else {
@@ -301,6 +305,14 @@ static zend_result dom_html5_serialize_node(dom_html5_serialize_context *ctx, co
301305
break;
302306
}
303307

308+
case XML_DOCUMENT_FRAG_NODE: {
309+
if (node->children) {
310+
node = node->children;
311+
continue;
312+
}
313+
break;
314+
}
315+
304316
/* Only exists for compatibility with XML and old DOM. */
305317
case XML_ENTITY_REF_NODE: {
306318
TRY(dom_html5_serialize_entity_ref(ctx, node));
@@ -346,10 +358,15 @@ zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode
346358
}
347359

348360
/* Step 2 not needed because we're not using a string to store the serialized data */
349-
/* Step 3 not needed because we don't support template contents yet */
361+
362+
/* Step 3. If the node is a template element, then let the node instead be the template element's template contents (a DocumentFragment node). */
363+
xmlNodePtr children = php_dom_retrieve_templated_content(ctx->private_data, node);
364+
if (!children) {
365+
children = node->children;
366+
}
350367

351368
/* Step 4 */
352-
return dom_html5_serialize_node(ctx, node->children, node);
369+
return dom_html5_serialize_node(ctx, children, node);
353370
}
354371

355372
/* Variant on the above that is equivalent to the "outer HTML". */

ext/dom/html5_serializer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919

2020
#include <Zend/zend_types.h>
2121
#include <libxml/tree.h>
22+
#include "private_data.h"
2223

2324
typedef struct {
2425
zend_result (*write_string)(void *application_data, const char *buf);
2526
zend_result (*write_string_len)(void *application_data, const char *buf, size_t len);
2627
void *application_data;
28+
php_dom_private_data *private_data;
2729
} dom_html5_serialize_context;
2830

2931
zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode *node);

ext/dom/html_document.c

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "html5_parser.h"
2626
#include "html5_serializer.h"
2727
#include "namespace_compat.h"
28+
#include "private_data.h"
2829
#include "dom_properties.h"
2930
#include <Zend/zend_smart_string.h>
3031
#include <lexbor/html/encoding.h>
@@ -879,15 +880,14 @@ PHP_METHOD(Dom_HTMLDocument, createFromString)
879880
}
880881

881882
php_dom_private_data *private_data = php_dom_private_data_create();
882-
php_dom_libxml_ns_mapper *ns_mapper = php_dom_ns_mapper_from_private(private_data);
883883

884884
xmlDocPtr lxml_doc;
885885
lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(
886886
document,
887887
&lxml_doc,
888888
options & XML_PARSE_COMPACT,
889889
!(options & DOM_HTML_NO_DEFAULT_NS),
890-
ns_mapper
890+
private_data
891891
);
892892
lexbor_libxml2_bridge_copy_observations(parser->tree, &ctx.observations);
893893
if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
@@ -1071,15 +1071,14 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
10711071
}
10721072

10731073
private_data = php_dom_private_data_create();
1074-
php_dom_libxml_ns_mapper *ns_mapper = php_dom_ns_mapper_from_private(private_data);
10751074

10761075
xmlDocPtr lxml_doc;
10771076
lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(
10781077
document,
10791078
&lxml_doc,
10801079
options & XML_PARSE_COMPACT,
10811080
!(options & DOM_HTML_NO_DEFAULT_NS),
1082-
ns_mapper
1081+
private_data
10831082
);
10841083
lexbor_libxml2_bridge_copy_observations(parser->tree, &ctx.observations);
10851084
if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
@@ -1206,7 +1205,7 @@ static zend_result dom_saveHTML_write_string(void *application_data, const char
12061205
return dom_saveHTML_write_string_len(application_data, buf, strlen(buf));
12071206
}
12081207

1209-
static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *docp, const xmlNode *node)
1208+
static zend_result dom_common_save(dom_output_ctx *output_ctx, dom_object *intern, const xmlDoc *docp, const xmlNode *node)
12101209
{
12111210
/* Initialize everything related to encoding & decoding */
12121211
const lxb_encoding_data_t *decoding_data = lxb_encoding_data(LXB_ENCODING_UTF_8);
@@ -1239,6 +1238,7 @@ static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *doc
12391238
ctx.write_string_len = dom_saveHTML_write_string_len;
12401239
ctx.write_string = dom_saveHTML_write_string;
12411240
ctx.application_data = output_ctx;
1241+
ctx.private_data = php_dom_get_private_data(intern);
12421242
if (UNEXPECTED(dom_html5_serialize_outer(&ctx, node) != SUCCESS)) {
12431243
return FAILURE;
12441244
}
@@ -1297,7 +1297,7 @@ PHP_METHOD(Dom_HTMLDocument, saveHtmlFile)
12971297
dom_output_ctx output_ctx;
12981298
output_ctx.output_data = stream;
12991299
output_ctx.write_output = dom_write_output_stream;
1300-
if (UNEXPECTED(dom_common_save(&output_ctx, docp, (const xmlNode *) docp) != SUCCESS)) {
1300+
if (UNEXPECTED(dom_common_save(&output_ctx, intern, docp, (const xmlNode *) docp) != SUCCESS)) {
13011301
php_stream_close(stream);
13021302
RETURN_FALSE;
13031303
}
@@ -1336,7 +1336,7 @@ PHP_METHOD(Dom_HTMLDocument, saveHtml)
13361336
output_ctx.output_data = &buf;
13371337
output_ctx.write_output = dom_write_output_smart_str;
13381338
/* Can't fail because dom_write_output_smart_str() can't fail. */
1339-
zend_result result = dom_common_save(&output_ctx, docp, node);
1339+
zend_result result = dom_common_save(&output_ctx, intern, docp, node);
13401340
ZEND_ASSERT(result == SUCCESS);
13411341

13421342
RETURN_STR(smart_str_extract(&buf));
@@ -1644,4 +1644,19 @@ zend_result dom_html_document_title_write(dom_object *obj, zval *newval)
16441644
return SUCCESS;
16451645
}
16461646

1647+
#if ZEND_DEBUG
1648+
PHP_METHOD(Dom_HTMLDocument, debugGetTemplateCount)
1649+
{
1650+
xmlDocPtr doc;
1651+
dom_object *intern;
1652+
1653+
ZEND_PARSE_PARAMETERS_NONE();
1654+
1655+
DOM_GET_OBJ(doc, ZEND_THIS, xmlDocPtr, intern);
1656+
ZEND_IGNORE_VALUE(doc);
1657+
1658+
RETURN_LONG((zend_long) php_dom_get_template_count((const php_dom_private_data *) intern->document->private_data));
1659+
}
1660+
#endif
1661+
16471662
#endif /* HAVE_LIBXML && HAVE_DOM */

0 commit comments

Comments
 (0)